diff --git a/ptypy/accelerate/cuda_common/batched_multiply.cu b/ptypy/accelerate/cuda_common/batched_multiply.cu index f91bb6d38..11394f68c 100644 --- a/ptypy/accelerate/cuda_common/batched_multiply.cu +++ b/ptypy/accelerate/cuda_common/batched_multiply.cu @@ -22,7 +22,7 @@ extern "C" __global__ void batched_multiply(const complex* input, int gy = threadIdx.y + blockIdx.y * blockDim.y; int gz = threadIdx.z + blockIdx.z * blockDim.z; - if (gx > columns || gy > rows || gz > nBatches) + if (gx > rows || gy > columns || gz > nBatches) return; auto val = input[gz * rows * columns + gy * rows + gx]; diff --git a/test/accelerate_tests/cuda_cupy_tests/fft_scaling_test.py b/test/accelerate_tests/cuda_cupy_tests/fft_scaling_test.py index 96cd12560..80cc2246e 100644 --- a/test/accelerate_tests/cuda_cupy_tests/fft_scaling_test.py +++ b/test/accelerate_tests/cuda_cupy_tests/fft_scaling_test.py @@ -39,16 +39,19 @@ def get_reverse_cuFFT(f, stream, class FftScalingTest(CupyCudaTest): - def get_input(self, size): + def get_input(self, size, squared=True): rows = cols = size + if not squared: + cols += 2 batches = 1 f = np.ones(shape=(batches, rows, cols), dtype=COMPLEX_TYPE) return f #### Trivial foward transform tests #### - def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=True, size=32): - f = self.get_input(size) + def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=True, + size=32, squared=True, decimal=6): + f = self.get_input(size, squared=squared) f_d = cp.asarray(f) if preffact is not None: pref = preffact * np.ones(shape=f.shape[-2:], dtype=np.complex64) @@ -70,8 +73,8 @@ def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=Tr elements = f.shape[-2] * f.shape[-1] scale = 1.0 if not symmetric else 1.0 / np.sqrt(elements) expected = elements * scale * preffact * postfact - self.assertAlmostEqual(f_back[0,0,0], expected) - np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=5) + np.testing.assert_almost_equal(f_back[0,0,0].real, expected, decimal=decimal) + np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=decimal) def test_fwd_noscale_cufft(self): self.fwd_test(False, get_forward_cuFFT) @@ -122,33 +125,58 @@ def test_prepostfilt_fwd_scale_cufft_cupy(self): self.fwd_test(True, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False) def test_fwd_not_power_two_noscale_cufft_cupy(self): - self.fwd_test(False, get_forward_cuFFT, external=False, size=20) + self.fwd_test(False, get_forward_cuFFT, external=False, size=20, decimal=5) def test_fwd_not_power_two_scale_cufft_cupy(self): self.fwd_test(True, get_forward_cuFFT, external=False, size=20) def test_prefilt_fwd_not_power_two_noscale_cufft_cupy(self): - self.fwd_test(False, get_forward_cuFFT, preffact=2.0, external=False, size=20) + self.fwd_test(False, get_forward_cuFFT, preffact=2.0, external=False, size=20, decimal=5) def test_prefilt_fwd_not_power_two_scale_cufft_cupy(self): self.fwd_test(True, get_forward_cuFFT, preffact=2.0, external=False, size=20) def test_postfilt_fwd_not_power_two_noscale_cufft_cupy(self): - self.fwd_test(False, get_forward_cuFFT, postfact=2.0, external=False, size=20) + self.fwd_test(False, get_forward_cuFFT, postfact=2.0, external=False, size=20, decimal=5) def test_postfilt_fwd_not_power_two_scale_cufft_cupy(self): self.fwd_test(True, get_forward_cuFFT, postfact=2.0, external=False, size=20) def test_prepostfilt_not_power_two_fwd_noscale_cufft_cupy(self): - self.fwd_test(False, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20) + self.fwd_test(False, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20, decimal=5) def test_prepostfilt_not_power_two_fwd_scale_cufft_cupy(self): self.fwd_test(True, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20) + + def test_fwd_not_power_two_not_squared_noscale_cufft_cupy(self): + self.fwd_test(False, get_forward_cuFFT, external=False, size=20, squared=False, decimal=5) + + def test_fwd_not_power_two_not_squared_scale_cufft_cupy(self): + self.fwd_test(True, get_forward_cuFFT, external=False, size=20, squared=False) + + def test_prefilt_fwd_not_power_two_not_squared_noscale_cufft_cupy(self): + self.fwd_test(False, get_forward_cuFFT, preffact=2.0, external=False, size=20, squared=False, decimal=4) + + def test_prefilt_fwd_not_power_two_not_squared_scale_cufft_cupy(self): + self.fwd_test(True, get_forward_cuFFT, preffact=2.0, external=False, size=20, squared=False) + + def test_postfilt_fwd_not_power_two_not_squared_noscale_cufft_cupy(self): + self.fwd_test(False, get_forward_cuFFT, postfact=2.0, external=False, size=20, squared=False, decimal=4) + + def test_postfilt_fwd_not_power_two_not_squared_scale_cufft_cupy(self): + self.fwd_test(True, get_forward_cuFFT, postfact=2.0, external=False, size=20, squared=False) + + def test_prepostfilt_not_power_two_not_squared_fwd_noscale_cufft_cupy(self): + self.fwd_test(False, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20, squared=False, decimal=4) + + def test_prepostfilt_not_power_two_not_squared_fwd_scale_cufft_cupy(self): + self.fwd_test(True, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False, size=20, squared=False, decimal=5) ############# Trivial inverse transform tests ######### - def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=True, size=32): - f = self.get_input(size) + def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=True, + size=32, squared=True, decimal=6): + f = self.get_input(size, squared=squared) f_d = cp.asarray(f) if preffact is not None: pref = preffact * np.ones(shape=f.shape[-2:], dtype=np.complex64) @@ -170,8 +198,8 @@ def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=Tr elements = f.shape[-2] * f.shape[-1] scale = 1.0 if not symmetric else np.sqrt(elements) expected = scale * preffact * postfact - self.assertAlmostEqual(f_back[0,0,0], expected) - np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=5) + np.testing.assert_almost_equal(f_back[0,0,0].real, expected, decimal=decimal) + np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=decimal) def test_rev_noscale_cufft(self): @@ -246,5 +274,29 @@ def test_prepostfilt_rev_not_power_two_noscale_cufft_cupy(self): def test_prepostfilt_rev_not_power_two_scale_cufft_cupy(self): self.rev_test(True, get_reverse_cuFFT, postfact=1.5, preffact=2.0, external=False, size=20) + def test_rev_not_power_two_not_squared_noscale_cufft_cupy(self): + self.rev_test(False, get_reverse_cuFFT, external=False, size=20, squared=False) + + def test_rev_not_power_two_not_squared_scale_cufft_cupy(self): + self.rev_test(True, get_reverse_cuFFT, external=False, size=20, squared=False) + + def test_prefilt_rev_not_power_two_not_squared_noscale_cufft_cupy(self): + self.rev_test(False, get_reverse_cuFFT, preffact=1.5, external=False, size=20, squared=False) + + def test_prefilt_rev_not_power_two_not_squared_scale_cufft_cupy(self): + self.rev_test(True, get_reverse_cuFFT, preffact=1.5, external=False, size=20, squared=False, decimal=5) + + def test_postfilt_rev_not_power_two_not_squared_noscale_cufft_cupy(self): + self.rev_test(False, get_reverse_cuFFT, postfact=1.5, external=False, size=20, squared=False) + + def test_postfilt_rev_not_power_two_not_squared_scale_cufft_cupy(self): + self.rev_test(True, get_reverse_cuFFT, postfact=1.5, external=False, size=20, squared=False) + + def test_prepostfilt_rev_not_power_not_squared_two_noscale_cufft_cupy(self): + self.rev_test(False, get_reverse_cuFFT, postfact=1.5, preffact=2.0, external=False, size=20, squared=False) + + def test_prepostfilt_rev_not_power_not_squared_two_scale_cufft_cupy(self): + self.rev_test(True, get_reverse_cuFFT, postfact=1.5, preffact=2.0, external=False, size=20, squared=False) + if __name__ == '__main__': unittest.main() diff --git a/test/accelerate_tests/cuda_pycuda_tests/fft_scaling_test.py b/test/accelerate_tests/cuda_pycuda_tests/fft_scaling_test.py index b16a92902..18a02ef1e 100644 --- a/test/accelerate_tests/cuda_pycuda_tests/fft_scaling_test.py +++ b/test/accelerate_tests/cuda_pycuda_tests/fft_scaling_test.py @@ -52,16 +52,19 @@ def get_reverse_Reikna(f, stream, class FftScalingTest(PyCudaTest): - def get_input(self): - rows = cols = 32 + def get_input(self, size, squared=True): + rows = cols = size + if not squared: + cols += 2 batches = 1 f = np.ones(shape=(batches, rows, cols), dtype=COMPLEX_TYPE) return f #### Trivial foward transform tests #### - def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=True): - f = self.get_input() + def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=True, + size=32, squared=True, decimal=6): + f = self.get_input(size, squared=squared) f_d = gpuarray.to_gpu(f) if preffact is not None: pref = preffact * np.ones(shape=f.shape[-2:], dtype=np.complex64) @@ -83,8 +86,8 @@ def fwd_test(self, symmetric, factory, preffact=None, postfact=None, external=Tr elements = f.shape[-2] * f.shape[-1] scale = 1.0 if not symmetric else 1.0 / np.sqrt(elements) expected = elements * scale * preffact * postfact - self.assertAlmostEqual(f_back[0,0,0], expected) - np.testing.assert_array_almost_equal(f_back.flat[1:], 0) + np.testing.assert_almost_equal(f_back[0,0,0].real, expected, decimal=decimal) + np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=decimal) def test_fwd_noscale_reikna(self): self.fwd_test(False, get_forward_Reikna) @@ -166,11 +169,59 @@ def test_prepostfilt_fwd_scale_cufft(self): def test_prepostfilt_fwd_scale_cufft_skcuda(self): self.fwd_test(True, get_forward_cuFFT, postfact=2.0, preffact=1.5, external=False) + def test_fwd_not_power_two_noscale_reikna(self): + self.fwd_test(False, get_forward_Reikna, size=20, decimal=4) + + def test_fwd_not_power_two_scale_reikna(self): + self.fwd_test(True, get_forward_Reikna, size=20, decimal=4) + + def test_prefilt_fwd_not_power_two_noscale_reikna(self): + self.fwd_test(False, get_forward_Reikna, preffact=2.0, size=20, decimal=4) + + def test_prefilt_fwd_not_power_two_scale_reikna(self): + self.fwd_test(True, get_forward_Reikna, preffact=2.0, size=20, decimal=4) + + def test_postfilt_fwd_not_power_two_noscale_reikna(self): + self.fwd_test(False, get_forward_Reikna, postfact=2.0, size=20, decimal=4) + + def test_postfilt_fwd_not_power_two_scale_reikna(self): + self.fwd_test(True, get_forward_Reikna, postfact=2.0, size=20, decimal=4) + + def test_prepostfilt_fwd_not_power_two_noscale_reikna(self): + self.fwd_test(False, get_forward_Reikna, postfact=2.0, preffact=1.5, size=20, decimal=4) + + def test_prepostfilt_fwd_not_power_two_scale_reikna(self): + self.fwd_test(True, get_forward_Reikna, postfact=2.0, preffact=1.5, size=20, decimal=4) + + def test_fwd_not_power_two_noscale_not_squared_reikna(self): + self.fwd_test(False, get_forward_Reikna, size=20, squared=False, decimal=4) + + def test_fwd_not_power_two_not_squared_scale_reikna(self): + self.fwd_test(True, get_forward_Reikna, size=20, squared=False, decimal=4) + + def test_prefilt_fwd_not_power_two_not_squared_noscale_reikna(self): + self.fwd_test(False, get_forward_Reikna, preffact=2.0, size=20, squared=False, decimal=4) + + def test_prefilt_fwd_not_power_two_not_squared_scale_reikna(self): + self.fwd_test(True, get_forward_Reikna, preffact=2.0, size=20, squared=False, decimal=4) + + def test_postfilt_fwd_not_power_two_not_squared_noscale_reikna(self): + self.fwd_test(False, get_forward_Reikna, postfact=2.0, size=20, squared=False, decimal=4) + + def test_postfilt_fwd_not_power_two_not_squared_scale_reikna(self): + self.fwd_test(True, get_forward_Reikna, postfact=2.0, size=20, squared=False, decimal=4) + + def test_prepostfilt_fwd_not_power_two_not_squared_noscale_reikna(self): + self.fwd_test(False, get_forward_Reikna, postfact=2.0, preffact=1.5, size=20, squared=False, decimal=4) + + def test_prepostfilt_fwd_not_power_two_not_squared_scale_reikna(self): + self.fwd_test(True, get_forward_Reikna, postfact=2.0, preffact=1.5, size=20, squared=False, decimal=4) ############# Trivial inverse transform tests ######### - def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=True): - f = self.get_input() + def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=True, + size=32, squared=True, decimal=6): + f = self.get_input(size, squared=squared) f_d = gpuarray.to_gpu(f) if preffact is not None: pref = preffact * np.ones(shape=f.shape[-2:], dtype=np.complex64) @@ -192,8 +243,8 @@ def rev_test(self, symmetric, factory, preffact=None, postfact=None, external=Tr elements = f.shape[-2] * f.shape[-1] scale = 1.0 if not symmetric else np.sqrt(elements) expected = scale * preffact * postfact - self.assertAlmostEqual(f_back[0,0,0], expected) - np.testing.assert_array_almost_equal(f_back.flat[1:], 0) + np.testing.assert_almost_equal(f_back[0,0,0].real, expected, decimal=decimal) + np.testing.assert_array_almost_equal(f_back.flat[1:], 0, decimal=decimal) def test_rev_noscale_reikna(self): @@ -276,6 +327,54 @@ def test_prepostfilt_rev_scale_cufft(self): def test_prepostfilt_rev_scale_cufft_skcuda(self): self.rev_test(True, get_reverse_cuFFT, postfact=1.5, preffact=2.0, external=False) + def test_rev_not_power_two_noscale_reikna(self): + self.rev_test(False, get_reverse_Reikna, size=20) + + def test_rev_not_power_two_scale_reikna(self): + self.rev_test(True, get_reverse_Reikna, size=20, decimal=5) + + def test_prefilt_rev_not_power_two_noscale_reikna(self): + self.rev_test(False, get_reverse_Reikna, preffact=1.5, size=20) + + def test_prefilt_rev_not_power_two_scale_reikna(self): + self.rev_test(True, get_reverse_Reikna, preffact=1.5, size=20, decimal=5) + + def test_postfilt_rev_not_power_two_noscale_reikna(self): + self.rev_test(False, get_reverse_Reikna, postfact=1.5, size=20) + + def test_postfilt_rev_not_power_two_scale_reikna(self): + self.rev_test(True, get_reverse_Reikna, postfact=1.5, size=20, decimal=5) + + def test_prepostfilt_rev_not_power_two_noscale_reikna(self): + self.rev_test(False, get_reverse_Reikna, postfact=1.5, preffact=2.0, size=20) + + def test_prepostfilt_rev_not_power_two_scale_reikna(self): + self.rev_test(True, get_reverse_Reikna, postfact=1.5, preffact=2.0, size=20, decimal=5) + + def test_rev_not_power_two_not_squared_noscale_reikna(self): + self.rev_test(False, get_reverse_Reikna, size=20, squared=False) + + def test_rev_not_power_two_not_squared_scale_reikna(self): + self.rev_test(True, get_reverse_Reikna, size=20, squared=False, decimal=5) + + def test_prefilt_rev_not_power_two_not_squared_noscale_reikna(self): + self.rev_test(False, get_reverse_Reikna, preffact=1.5, size=20, squared=False) + + def test_prefilt_rev_not_power_two_not_squared_scale_reikna(self): + self.rev_test(True, get_reverse_Reikna, preffact=1.5, size=20, squared=False, decimal=5) + + def test_postfilt_rev_not_power_two_not_squared_noscale_reikna(self): + self.rev_test(False, get_reverse_Reikna, postfact=1.5, size=20, squared=False) + + def test_postfilt_rev_not_power_two_not_squared_scale_reikna(self): + self.rev_test(True, get_reverse_Reikna, postfact=1.5, size=20, squared=False, decimal=5) + + def test_prepostfilt_rev_not_power_two_not_squared_noscale_reikna(self): + self.rev_test(False, get_reverse_Reikna, postfact=1.5, preffact=2.0, size=20, squared=False) + + def test_prepostfilt_rev_not_power_two_not_squared_scale_reikna(self): + self.rev_test(True, get_reverse_Reikna, postfact=1.5, preffact=2.0, size=20, squared=False, decimal=5) + if __name__ == '__main__': unittest.main()