diff --git a/ptypy/accelerate/cuda_cupy/engines/ML_cupy.py b/ptypy/accelerate/cuda_cupy/engines/ML_cupy.py
index cd68701a2..caa0a192d 100644
--- a/ptypy/accelerate/cuda_cupy/engines/ML_cupy.py
+++ b/ptypy/accelerate/cuda_cupy/engines/ML_cupy.py
@@ -267,7 +267,11 @@ def _get_smooth_gradient(self, data, sigma):
         if self.p.smooth_gradient_method == "convolution":
             if self.GSK.tmp is None:
                 self.GSK.tmp = cp.empty(data.shape, dtype=np.complex64)
-            self.GSK.convolution(data, [sigma, sigma], tmp=self.GSK.tmp)
+            try:
+                self.GSK.convolution(data, [sigma, sigma], tmp=self.GSK.tmp)
+            except MemoryError:
+                raise RuntimeError("Convolution kernel too large for direct convolution on GPU",
+                                   "Please reduce parameter smooth_gradient or set smooth_gradient_method='fft'.")
         elif self.p.smooth_gradient_method == "fft":
             self.FGSK.filter(data, sigma)
         else:
diff --git a/ptypy/accelerate/cuda_cupy/kernels.py b/ptypy/accelerate/cuda_cupy/kernels.py
index 118aadbe6..b743e9b08 100644
--- a/ptypy/accelerate/cuda_cupy/kernels.py
+++ b/ptypy/accelerate/cuda_cupy/kernels.py
@@ -171,7 +171,7 @@ def apply_real_support(self, x):
 
 
 class FFTFilterKernel:
-    def __init__(self, queue_thread=None, fft='cuda'):
+    def __init__(self, queue_thread=None, fft='cupy'):
         # Current implementation recompiles every time there is a change in input shape.
         self.queue = queue_thread
         self._fft_type = fft
diff --git a/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py b/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py
index b712f8974..d527d9f15 100644
--- a/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py
+++ b/ptypy/accelerate/cuda_pycuda/engines/ML_pycuda.py
@@ -262,7 +262,11 @@ def _get_smooth_gradient(self, data, sigma):
         if self.p.smooth_gradient_method == "convolution":
             if self.GSK.tmp is None:
                 self.GSK.tmp = gpuarray.empty(data.shape, dtype=np.complex64)
-            self.GSK.convolution(data, [sigma, sigma], tmp=self.GSK.tmp)
+            try:
+                self.GSK.convolution(data, [sigma, sigma], tmp=self.GSK.tmp)
+            except MemoryError:
+                raise RuntimeError("Convolution kernel too large for direct convolution on GPU",
+                                   "Please reduce parameter smooth_gradient or set smooth_gradient_method='fft'.")
         elif self.p.smooth_gradient_method == "fft":
             self.FGSK.filter(data, sigma)
         else: