Provide a safe value for frames_per_block for all pycuda engines

ptycho · Nov 22, 2024 · f7141e8 · f7141e8
1 parent dd4a3db
commit f7141e8
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 3 deletions.
diff --git a/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py b/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py
@@ -21,6 +21,8 @@
 from ptypy import utils as u
 from ptypy.utils.verbose import logger, log
 from ptypy.utils import parallel
+from ptypy.accelerate.base.mem_utils import (max_fpb_from_scans,
+calculate_safe_fpb)
 from .. import get_context, get_dev_pool
 from ..kernels import PropagationKernel, RealSupportKernel, FourierSupportKernel
 from ..kernels import GradientDescentKernel, AuxiliaryWaveKernel, PoUpdateKernel, PositionCorrectionKernel
@@ -33,6 +35,8 @@
 
 MAX_BLOCKS = 99999  # can be used to limit the number of blocks, simulating that they don't fit
 #MAX_BLOCKS = 3  # can be used to limit the number of blocks, simulating that they don't fit
+# the number of blocks to have with a safe value of frames_per_block
+NUM_BLK_SAFE_FPB = 3
 
 @register()
 class ML_pycuda(ML_serial):
@@ -163,9 +167,21 @@ def _setup_kernels(self):
         ma_mem = mag_mem
         mem = cuda.mem_get_info()[0]
         blk = ma_mem + mag_mem
-        fit = int(mem - 200 * 1024 * 1024) // blk  # leave 200MB room for safety
+
+        # leave 200MB room for safety
+        avail_mem = max(int(mem - 200 * 1024 * 1024), 0)
+        fit =  avail_mem // blk
         if not fit:
             log(1,"Cannot fit memory into device, if possible reduce frames per block. Exiting...")
+            # max_fpb is None if there is a GradFull in the scan models
+            # as 'frames_per_block' is irrelevant
+            max_fpb = max_fpb_from_scans(self.ptycho.model.scans)
+            if max_fpb is not None:
+                per_frame = blk / max_fpb
+                safe_fpb = calculate_safe_fpb(avail_mem, per_frame, NUM_BLK_SAFE_FPB)
+                log(1,f"Your current 'frames_per_block' is {max_fpb}.")
+                log(1,f"With current reconstruction parameters and computing resources, you can try setting 'frames_per_block' to {safe_fpb}.")
+                log(1,f"This would divide your reonstruction into {NUM_BLK_SAFE_FPB} blocks.")
             raise SystemExit("ptypy has been exited.")
 
         # TODO grow blocks dynamically

diff --git a/ptypy/accelerate/cuda_pycuda/engines/projectional_pycuda.py b/ptypy/accelerate/cuda_pycuda/engines/projectional_pycuda.py
@@ -19,6 +19,7 @@
 from ptypy.engines import register
 from ptypy.engines.projectional import DMMixin, RAARMixin
 from ptypy.accelerate.base.engines import projectional_serial
+from ptypy.accelerate.base.mem_utils import calculate_safe_fpb
 from .. import get_context
 from ..kernels import FourierUpdateKernel, AuxiliaryWaveKernel, PoUpdateKernel, PositionCorrectionKernel
 from ..kernels import PropagationKernel, RealSupportKernel, FourierSupportKernel
@@ -30,6 +31,10 @@
 
 __all__ = ['DM_pycuda', 'RAAR_pycuda']
 
+# the number of blocks to have with a safe value of frames_per_block
+NUM_BLK_SAFE_FPB = 3
+
+
 class _ProjectionEngine_pycuda(projectional_serial._ProjectionEngine_serial):
 
     """
@@ -123,6 +128,13 @@ def _setup_kernels(self):
             mem = cuda.mem_get_info()[0]
             if not int(mem) // aux.nbytes:
                 log(1,"Cannot fit memory into device, if possible reduce frames per block or nr. of modes. Exiting...")
+                if scan.__class__.__name__ != "GradFull":
+                    # only make sense if the model is not GradFull
+                    per_frame = (aux.nbytes / aux.shape[0]) * nmodes
+                    safe_fpb = calculate_safe_fpb(mem, per_frame, NUM_BLK_SAFE_FPB)
+                    log(1,f"Your current 'frames_per_block' is {fpc}.")
+                    log(1,f"With current reconstruction parameters and computing resources, you can try setting 'frames_per_block' to {safe_fpb}.")
+                    log(1,f"This would divide your reonstruction into {NUM_BLK_SAFE_FPB} blocks.")
                 raise SystemExit("ptypy has been exited.")
             kern.aux = gpuarray.to_gpu(aux)
 

diff --git a/ptypy/accelerate/cuda_pycuda/engines/projectional_pycuda_stream.py b/ptypy/accelerate/cuda_pycuda/engines/projectional_pycuda_stream.py
@@ -22,6 +22,8 @@
 from ptypy import utils as u
 from ptypy.utils.verbose import log, logger
 from ptypy.utils import parallel
+from ptypy.accelerate.base.mem_utils import (max_fpb_from_scans,
+calculate_safe_fpb)
 from ptypy.engines import register
 from ptypy.engines.projectional import DMMixin, RAARMixin
 from . import projectional_pycuda
@@ -32,6 +34,8 @@
 EX_MA_BLOCKS_RATIO = 2
 MAX_BLOCKS = 99999  # can be used to limit the number of blocks, simulating that they don't fit
 #MAX_BLOCKS = 3  # can be used to limit the number of blocks, simulating that they don't fit
+# the number of blocks to have with a safe value of frames_per_block
+NUM_BLK_SAFE_FPB = 3
 
 __all__ = ['DM_pycuda_stream', 'RAAR_pycuda_stream']
 
@@ -61,9 +65,21 @@ def _setup_kernels(self):
         ma_mem = mag_mem
         mem = cuda.mem_get_info()[0]
         blk = ex_mem * EX_MA_BLOCKS_RATIO + ma_mem + mag_mem
-        fit = int(mem - 200 * 1024 * 1024) // blk  # leave 200MB room for safety
+
+        # leave 200MB room for safety
+        avail_mem = max(int(mem - 200 * 1024 * 1024), 0)
+        fit =  avail_mem // blk
         if not fit:
             log(1,"Cannot fit memory into device, if possible reduce frames per block. Exiting...")
+            # max_fpb is None if there is a GradFull in the scan models
+            # as 'frames_per_block' is irrelevant
+            max_fpb = max_fpb_from_scans(self.ptycho.model.scans)
+            if max_fpb is not None:
+                per_frame = blk / max_fpb
+                safe_fpb = calculate_safe_fpb(avail_mem, per_frame, NUM_BLK_SAFE_FPB)
+                log(1,f"Your current 'frames_per_block' is {max_fpb}.")
+                log(1,f"With current reconstruction parameters and computing resources, you can try setting 'frames_per_block' to {safe_fpb}.")
+                log(1,f"This would divide your reonstruction into {NUM_BLK_SAFE_FPB} blocks.")
             raise SystemExit("ptypy has been exited.")
 
         # TODO grow blocks dynamically

diff --git a/ptypy/accelerate/cuda_pycuda/engines/stochastic.py b/ptypy/accelerate/cuda_pycuda/engines/stochastic.py
@@ -19,6 +19,8 @@
 from ptypy.engines import register
 from ptypy.engines.stochastic import EPIEMixin, SDRMixin
 from ptypy.accelerate.base.engines.stochastic import _StochasticEngineSerial
+from ptypy.accelerate.base.mem_utils import (max_fpb_from_scans,
+calculate_safe_fpb)
 from ptypy.accelerate.base import address_manglers
 from .. import get_context
 from ..kernels import FourierUpdateKernel, AuxiliaryWaveKernel, PoUpdateKernel,\
@@ -34,6 +36,8 @@
 EX_MA_BLOCKS_RATIO = 2
 MAX_BLOCKS = 99999  # can be used to limit the number of blocks, simulating that they don't fit
 #MAX_BLOCKS = 10  # can be used to limit the number of blocks, simulating that they don't fit
+# the number of blocks to have with a safe value of frames_per_block
+NUM_BLK_SAFE_FPB = 3
 
 class _StochasticEnginePycuda(_StochasticEngineSerial):
 
@@ -157,9 +161,20 @@ def _setup_kernels(self):
         ma_mem = mag_mem
         mem = cuda.mem_get_info()[0]
         blk = ex_mem * EX_MA_BLOCKS_RATIO + ma_mem + mag_mem
-        fit = int(mem - 200 * 1024 * 1024) // blk  # leave 200MB room for safety
+        # leave 200MB room for safety
+        avail_mem = max(int(mem - 200 * 1024 * 1024), 0)
+        fit =  avail_mem // blk
         if not fit:
             log(1,"Cannot fit memory into device, if possible reduce frames per block. Exiting...")
+            # max_fpb is None if there is a GradFull in the scan models
+            # as 'frames_per_block' is irrelevant
+            max_fpb = max_fpb_from_scans(self.ptycho.model.scans)
+            if max_fpb is not None:
+                per_frame = blk / max_fpb
+                safe_fpb = calculate_safe_fpb(avail_mem, per_frame, NUM_BLK_SAFE_FPB)
+                log(1,f"Your current 'frames_per_block' is {max_fpb}.")
+                log(1,f"With current reconstruction parameters and computing resources, you can try setting 'frames_per_block' to {safe_fpb}.")
+                log(1,f"This would divide your reonstruction into {NUM_BLK_SAFE_FPB} blocks.")
             raise SystemExit("ptypy has been exited.")
 
         # TODO grow blocks dynamically