diff --git a/ptypy/accelerate/cuda_common/batched_multiply.cu b/ptypy/accelerate/cuda_common/batched_multiply.cu
index f91bb6d38..11394f68c 100644
--- a/ptypy/accelerate/cuda_common/batched_multiply.cu
+++ b/ptypy/accelerate/cuda_common/batched_multiply.cu
@@ -22,7 +22,7 @@ extern "C" __global__ void batched_multiply(const complex<IN_TYPE>* input,
   int gy = threadIdx.y + blockIdx.y * blockDim.y;
   int gz = threadIdx.z + blockIdx.z * blockDim.z;
 
-  if (gx > columns || gy > rows || gz > nBatches)
+  if (gx > rows || gy > columns || gz > nBatches)
     return;
 
   auto val = input[gz * rows * columns + gy * rows + gx];
diff --git a/test/accelerate_tests/cuda_cupy_tests/fft_scaling_test.py b/test/accelerate_tests/cuda_cupy_tests/fft_scaling_test.py
index 96cd12560..80cc2246e 100644
--- a/test/accelerate_tests/cuda_cupy_tests/fft_scaling_test.py
+++ b/test/accelerate_tests/cuda_cupy_tests/fft_scaling_test.py
@@ -39,16 +39,19 @@ def get_reverse_cuFFT(f, stream,
 
 class FftScalingTest(CupyCudaTest):
 
-    def get_input(self, size):
+    def get_input(self, size, squared=True):
         rows = cols = size
+        if not squared:
+            cols += 2
         batches = 1
         f = np.ones(shape=(batches, rows, cols), dtype=COMPLEX_TYPE)
         return f
 
     #### Trivial foward transform tests ####
 
-    def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=True, size=32):
-        f = self.get_input(size)
+    def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=True, 
+                 size=32, squared=True, decimal=6):
+        f = self.get_input(size, squared=squared)
         f_d = cp.asarray(f)
         if preffact is not None:
             pref = preffact * np.ones(shape=f.shape[-2:], dtype=np.complex64)
@@ -70,8 +73,8 @@ def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=Tr
         elements = f.shape[-2] * f.shape[-1]
         scale = 1.0 if not symmetric else 1.0 / np.sqrt(elements)
         expected = elements * scale * preffact * postfact
-        self.assertAlmostEqual(f_back[0,0,0], expected)
-        np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=5)
+        np.testing.assert_almost_equal(f_back[0,0,0].real, expected, decimal=decimal)
+        np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=decimal)
 
     def test_fwd_noscale_cufft(self):
         self.fwd_test(False, get_forward_cuFFT)
@@ -122,33 +125,58 @@ def test_prepostfilt_fwd_scale_cufft_cupy(self):
         self.fwd_test(True, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False)
 
     def test_fwd_not_power_two_noscale_cufft_cupy(self):
-        self.fwd_test(False, get_forward_cuFFT, external=False, size=20)
+        self.fwd_test(False, get_forward_cuFFT, external=False, size=20, decimal=5)
 
     def test_fwd_not_power_two_scale_cufft_cupy(self):
         self.fwd_test(True, get_forward_cuFFT, external=False, size=20)
 
     def test_prefilt_fwd_not_power_two_noscale_cufft_cupy(self):
-        self.fwd_test(False, get_forward_cuFFT, preffact=2.0, external=False, size=20)
+        self.fwd_test(False, get_forward_cuFFT, preffact=2.0, external=False, size=20, decimal=5)
 
     def test_prefilt_fwd_not_power_two_scale_cufft_cupy(self):
         self.fwd_test(True, get_forward_cuFFT, preffact=2.0, external=False, size=20)
 
     def test_postfilt_fwd_not_power_two_noscale_cufft_cupy(self):
-        self.fwd_test(False, get_forward_cuFFT, postfact=2.0, external=False, size=20)
+        self.fwd_test(False, get_forward_cuFFT, postfact=2.0, external=False, size=20, decimal=5)
 
     def test_postfilt_fwd_not_power_two_scale_cufft_cupy(self):
         self.fwd_test(True, get_forward_cuFFT, postfact=2.0, external=False, size=20)
 
     def test_prepostfilt_not_power_two_fwd_noscale_cufft_cupy(self):
-        self.fwd_test(False, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20)
+        self.fwd_test(False, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20, decimal=5)
 
     def test_prepostfilt_not_power_two_fwd_scale_cufft_cupy(self):
         self.fwd_test(True, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20)
+
+    def test_fwd_not_power_two_not_squared_noscale_cufft_cupy(self):
+        self.fwd_test(False, get_forward_cuFFT, external=False, size=20, squared=False, decimal=5)
+
+    def test_fwd_not_power_two_not_squared_scale_cufft_cupy(self):
+        self.fwd_test(True, get_forward_cuFFT, external=False, size=20, squared=False)
+
+    def test_prefilt_fwd_not_power_two_not_squared_noscale_cufft_cupy(self):
+        self.fwd_test(False, get_forward_cuFFT, preffact=2.0, external=False, size=20, squared=False, decimal=4)
+
+    def test_prefilt_fwd_not_power_two_not_squared_scale_cufft_cupy(self):
+        self.fwd_test(True, get_forward_cuFFT, preffact=2.0, external=False, size=20, squared=False)
+
+    def test_postfilt_fwd_not_power_two_not_squared_noscale_cufft_cupy(self):
+        self.fwd_test(False, get_forward_cuFFT, postfact=2.0, external=False, size=20, squared=False, decimal=4)
+
+    def test_postfilt_fwd_not_power_two_not_squared_scale_cufft_cupy(self):
+        self.fwd_test(True, get_forward_cuFFT, postfact=2.0, external=False, size=20, squared=False)
+
+    def test_prepostfilt_not_power_two_not_squared_fwd_noscale_cufft_cupy(self):
+        self.fwd_test(False, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20, squared=False, decimal=4)
+
+    def test_prepostfilt_not_power_two_not_squared_fwd_scale_cufft_cupy(self):
+        self.fwd_test(True, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20, squared=False, decimal=5)
         
     ############# Trivial inverse transform tests #########
 
-    def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=True, size=32):
-        f = self.get_input(size)
+    def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=True, 
+                 size=32, squared=True, decimal=6):
+        f = self.get_input(size, squared=squared)
         f_d = cp.asarray(f)
         if preffact is not None:
             pref = preffact * np.ones(shape=f.shape[-2:], dtype=np.complex64)
@@ -170,8 +198,8 @@ def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=Tr
         elements = f.shape[-2] * f.shape[-1]
         scale = 1.0 if not symmetric else np.sqrt(elements)
         expected = scale * preffact * postfact
-        self.assertAlmostEqual(f_back[0,0,0], expected)
-        np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=5)
+        np.testing.assert_almost_equal(f_back[0,0,0].real, expected, decimal=decimal)
+        np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=decimal)
 
 
     def test_rev_noscale_cufft(self):
@@ -246,5 +274,29 @@ def test_prepostfilt_rev_not_power_two_noscale_cufft_cupy(self):
     def test_prepostfilt_rev_not_power_two_scale_cufft_cupy(self):
         self.rev_test(True, get_reverse_cuFFT, postfact=1.5, preffact=2.0, external=False, size=20)
 
+    def test_rev_not_power_two_not_squared_noscale_cufft_cupy(self):
+        self.rev_test(False, get_reverse_cuFFT, external=False, size=20, squared=False)
+
+    def test_rev_not_power_two_not_squared_scale_cufft_cupy(self):
+        self.rev_test(True, get_reverse_cuFFT, external=False, size=20, squared=False)
+
+    def test_prefilt_rev_not_power_two_not_squared_noscale_cufft_cupy(self):
+        self.rev_test(False, get_reverse_cuFFT, preffact=1.5, external=False, size=20, squared=False)
+
+    def test_prefilt_rev_not_power_two_not_squared_scale_cufft_cupy(self):
+        self.rev_test(True, get_reverse_cuFFT, preffact=1.5, external=False, size=20, squared=False, decimal=5)
+
+    def test_postfilt_rev_not_power_two_not_squared_noscale_cufft_cupy(self):
+        self.rev_test(False, get_reverse_cuFFT, postfact=1.5, external=False, size=20, squared=False)
+
+    def test_postfilt_rev_not_power_two_not_squared_scale_cufft_cupy(self):
+        self.rev_test(True, get_reverse_cuFFT, postfact=1.5, external=False, size=20, squared=False)
+
+    def test_prepostfilt_rev_not_power_not_squared_two_noscale_cufft_cupy(self):
+        self.rev_test(False, get_reverse_cuFFT, postfact=1.5, preffact=2.0, external=False, size=20, squared=False)
+
+    def test_prepostfilt_rev_not_power_not_squared_two_scale_cufft_cupy(self):
+        self.rev_test(True, get_reverse_cuFFT, postfact=1.5, preffact=2.0, external=False, size=20, squared=False)
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/accelerate_tests/cuda_pycuda_tests/fft_scaling_test.py b/test/accelerate_tests/cuda_pycuda_tests/fft_scaling_test.py
index b16a92902..18a02ef1e 100644
--- a/test/accelerate_tests/cuda_pycuda_tests/fft_scaling_test.py
+++ b/test/accelerate_tests/cuda_pycuda_tests/fft_scaling_test.py
@@ -52,16 +52,19 @@ def get_reverse_Reikna(f, stream,
 
 class FftScalingTest(PyCudaTest):
 
-    def get_input(self):
-        rows = cols = 32
+    def get_input(self, size, squared=True):
+        rows = cols = size
+        if not squared:
+            cols += 2
         batches = 1
         f = np.ones(shape=(batches, rows, cols), dtype=COMPLEX_TYPE)
         return f
 
     #### Trivial foward transform tests ####
 
-    def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=True):
-        f = self.get_input()
+    def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=True, 
+                 size=32, squared=True, decimal=6):
+        f = self.get_input(size, squared=squared)
         f_d = gpuarray.to_gpu(f)
         if preffact is not None:
             pref = preffact * np.ones(shape=f.shape[-2:], dtype=np.complex64)
@@ -83,8 +86,8 @@ def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=Tr
         elements = f.shape[-2] * f.shape[-1]
         scale = 1.0 if not symmetric else 1.0 / np.sqrt(elements)
         expected = elements * scale * preffact * postfact
-        self.assertAlmostEqual(f_back[0,0,0], expected)
-        np.testing.assert_array_almost_equal(f_back.flat[1:], 0)
+        np.testing.assert_almost_equal(f_back[0,0,0].real, expected, decimal=decimal)
+        np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=decimal)
 
     def test_fwd_noscale_reikna(self):
         self.fwd_test(False, get_forward_Reikna)
@@ -166,11 +169,59 @@ def test_prepostfilt_fwd_scale_cufft(self):
     def test_prepostfilt_fwd_scale_cufft_skcuda(self):
         self.fwd_test(True, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False)
 
+    def test_fwd_not_power_two_noscale_reikna(self):
+        self.fwd_test(False, get_forward_Reikna, size=20, decimal=4)
+
+    def test_fwd_not_power_two_scale_reikna(self):
+        self.fwd_test(True, get_forward_Reikna, size=20, decimal=4)
+
+    def test_prefilt_fwd_not_power_two_noscale_reikna(self):
+        self.fwd_test(False, get_forward_Reikna, preffact=2.0, size=20, decimal=4)
+
+    def test_prefilt_fwd_not_power_two_scale_reikna(self):
+        self.fwd_test(True, get_forward_Reikna, preffact=2.0, size=20, decimal=4)
+
+    def test_postfilt_fwd_not_power_two_noscale_reikna(self):
+        self.fwd_test(False, get_forward_Reikna, postfact=2.0, size=20, decimal=4)
+
+    def test_postfilt_fwd_not_power_two_scale_reikna(self):
+        self.fwd_test(True, get_forward_Reikna, postfact=2.0, size=20, decimal=4)
+
+    def test_prepostfilt_fwd_not_power_two_noscale_reikna(self):
+        self.fwd_test(False, get_forward_Reikna, postfact=2.0, preffact=1.5, size=20, decimal=4)
+
+    def test_prepostfilt_fwd_not_power_two_scale_reikna(self):
+        self.fwd_test(True, get_forward_Reikna, postfact=2.0, preffact=1.5, size=20, decimal=4)
+
+    def test_fwd_not_power_two_noscale_not_squared_reikna(self):
+        self.fwd_test(False, get_forward_Reikna, size=20, squared=False, decimal=4)
+
+    def test_fwd_not_power_two_not_squared_scale_reikna(self):
+        self.fwd_test(True, get_forward_Reikna, size=20, squared=False, decimal=4)
+
+    def test_prefilt_fwd_not_power_two_not_squared_noscale_reikna(self):
+        self.fwd_test(False, get_forward_Reikna, preffact=2.0, size=20, squared=False, decimal=4)
+
+    def test_prefilt_fwd_not_power_two_not_squared_scale_reikna(self):
+        self.fwd_test(True, get_forward_Reikna, preffact=2.0, size=20, squared=False, decimal=4)
+
+    def test_postfilt_fwd_not_power_two_not_squared_noscale_reikna(self):
+        self.fwd_test(False, get_forward_Reikna, postfact=2.0, size=20, squared=False, decimal=4)
+
+    def test_postfilt_fwd_not_power_two_not_squared_scale_reikna(self):
+        self.fwd_test(True, get_forward_Reikna, postfact=2.0, size=20, squared=False, decimal=4)
+
+    def test_prepostfilt_fwd_not_power_two_not_squared_noscale_reikna(self):
+        self.fwd_test(False, get_forward_Reikna, postfact=2.0, preffact=1.5, size=20, squared=False, decimal=4)
+
+    def test_prepostfilt_fwd_not_power_two_not_squared_scale_reikna(self):
+        self.fwd_test(True, get_forward_Reikna, postfact=2.0, preffact=1.5, size=20, squared=False, decimal=4)
 
     ############# Trivial inverse transform tests #########
 
-    def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=True):
-        f = self.get_input()
+    def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=True, 
+                 size=32, squared=True, decimal=6):
+        f = self.get_input(size, squared=squared)
         f_d = gpuarray.to_gpu(f)
         if preffact is not None:
             pref = preffact * np.ones(shape=f.shape[-2:], dtype=np.complex64)
@@ -192,8 +243,8 @@ def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=Tr
         elements = f.shape[-2] * f.shape[-1]
         scale = 1.0 if not symmetric else np.sqrt(elements)
         expected = scale * preffact * postfact
-        self.assertAlmostEqual(f_back[0,0,0], expected)
-        np.testing.assert_array_almost_equal(f_back.flat[1:], 0)
+        np.testing.assert_almost_equal(f_back[0,0,0].real, expected, decimal=decimal)
+        np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=decimal)
 
 
     def test_rev_noscale_reikna(self):
@@ -276,6 +327,54 @@ def test_prepostfilt_rev_scale_cufft(self):
     def test_prepostfilt_rev_scale_cufft_skcuda(self):
         self.rev_test(True, get_reverse_cuFFT, postfact=1.5, preffact=2.0, external=False)
 
+    def test_rev_not_power_two_noscale_reikna(self):
+        self.rev_test(False, get_reverse_Reikna, size=20)
+
+    def test_rev_not_power_two_scale_reikna(self):
+        self.rev_test(True, get_reverse_Reikna, size=20, decimal=5)
+
+    def test_prefilt_rev_not_power_two_noscale_reikna(self):
+        self.rev_test(False, get_reverse_Reikna, preffact=1.5, size=20)
+
+    def test_prefilt_rev_not_power_two_scale_reikna(self):
+        self.rev_test(True, get_reverse_Reikna, preffact=1.5, size=20, decimal=5)
+
+    def test_postfilt_rev_not_power_two_noscale_reikna(self):
+        self.rev_test(False, get_reverse_Reikna, postfact=1.5, size=20)
+
+    def test_postfilt_rev_not_power_two_scale_reikna(self):
+        self.rev_test(True, get_reverse_Reikna, postfact=1.5, size=20, decimal=5)
+
+    def test_prepostfilt_rev_not_power_two_noscale_reikna(self):
+        self.rev_test(False, get_reverse_Reikna, postfact=1.5, preffact=2.0, size=20)
+
+    def test_prepostfilt_rev_not_power_two_scale_reikna(self):
+        self.rev_test(True, get_reverse_Reikna, postfact=1.5, preffact=2.0, size=20, decimal=5)
+
+    def test_rev_not_power_two_not_squared_noscale_reikna(self):
+        self.rev_test(False, get_reverse_Reikna, size=20, squared=False)
+
+    def test_rev_not_power_two_not_squared_scale_reikna(self):
+        self.rev_test(True, get_reverse_Reikna, size=20, squared=False, decimal=5)
+
+    def test_prefilt_rev_not_power_two_not_squared_noscale_reikna(self):
+        self.rev_test(False, get_reverse_Reikna, preffact=1.5, size=20, squared=False)
+
+    def test_prefilt_rev_not_power_two_not_squared_scale_reikna(self):
+        self.rev_test(True, get_reverse_Reikna, preffact=1.5, size=20, squared=False, decimal=5)
+
+    def test_postfilt_rev_not_power_two_not_squared_noscale_reikna(self):
+        self.rev_test(False, get_reverse_Reikna, postfact=1.5, size=20, squared=False)
+
+    def test_postfilt_rev_not_power_two_not_squared_scale_reikna(self):
+        self.rev_test(True, get_reverse_Reikna, postfact=1.5, size=20, squared=False, decimal=5)
+
+    def test_prepostfilt_rev_not_power_two_not_squared_noscale_reikna(self):
+        self.rev_test(False, get_reverse_Reikna, postfact=1.5, preffact=2.0, size=20, squared=False)
+
+    def test_prepostfilt_rev_not_power_two_not_squared_scale_reikna(self):
+        self.rev_test(True, get_reverse_Reikna, postfact=1.5, preffact=2.0, size=20, squared=False, decimal=5)
+
 
 if __name__ == '__main__':
     unittest.main()