diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_cache.py b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py new file mode 100644 index 00000000000..dc2a82a2385 --- /dev/null +++ b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py @@ -0,0 +1,510 @@ +import contextlib +import io +import queue +import threading +import unittest + +import pytest + +import dpnp as cupy +from dpnp.tests.third_party.cupy import testing + +# from cupy.cuda import cufft +# from cupy.cuda import device +# from cupy.cuda import runtime +# from cupy.fft import config + +# from .test_fft import (multi_gpu_config, _skip_multi_gpu_bug) + +pytest.skip("FFT cache functions are not supported", allow_module_level=True) + + +def intercept_stdout(func): + with io.StringIO() as buf, contextlib.redirect_stdout(buf): + func() + stdout = buf.getvalue() + return stdout + + +n_devices = runtime.getDeviceCount() + + +class TestPlanCache(unittest.TestCase): + def setUp(self): + self.caches = [] + self.old_sizes = [] + for i in range(n_devices): + with device.Device(i): + cache = config.get_plan_cache() + self.old_sizes.append(cache.get_size()) + cache.clear() + cache.set_memsize(-1) + cache.set_size(2) + self.caches.append(cache) + + def tearDown(self): + for i in range(n_devices): + with device.Device(i): + cache = config.get_plan_cache() + cache.clear() + cache.set_size(self.old_sizes[i]) + cache.set_memsize(-1) + + def test_LRU_cache1(self): + # test if insertion and clean-up works + cache = config.get_plan_cache() + assert cache.get_curr_size() == 0 <= cache.get_size() + + a = testing.shaped_random((10,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + + cache.clear() + assert cache.get_curr_size() == 0 <= cache.get_size() + + def test_LRU_cache2(self): + # test if plan is reused + cache = config.get_plan_cache() + assert cache.get_curr_size() == 0 <= cache.get_size() + + # run once and fetch the cached plan + a = testing.shaped_random((10,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + iterator = iter(cache) + plan0 = next(iterator)[1].plan + + # repeat + cupy.fft.fft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + iterator = iter(cache) + plan1 = next(iterator)[1].plan + + # we should get the same plan + assert plan0 is plan1 + + def test_LRU_cache3(self): + # test if cache size is limited + cache = config.get_plan_cache() + assert cache.get_curr_size() == 0 <= cache.get_size() + + # run once and fetch the cached plan + a = testing.shaped_random((10,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + iterator = iter(cache) + plan = next(iterator)[1].plan + + # run another two FFTs with different sizes so that the first + # plan is discarded from the cache + a = testing.shaped_random((20,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache.get_curr_size() == 2 <= cache.get_size() + a = testing.shaped_random((30,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache.get_curr_size() == 2 <= cache.get_size() + + # check if the first plan is indeed not cached + for _, node in cache: + assert plan is not node.plan + + def test_LRU_cache4(self): + # test if fetching the plan will reorder it to the top + cache = config.get_plan_cache() + assert cache.get_curr_size() == 0 <= cache.get_size() + + # this creates a Plan1d + a = testing.shaped_random((10,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + + # this creates a PlanNd + a = testing.shaped_random((10, 20), cupy, cupy.float32) + cupy.fft.fftn(a) + assert cache.get_curr_size() == 2 <= cache.get_size() + + # The first in the cache is the most recently used one; + # using an iterator to access the linked list guarantees that + # we don't alter the cache order + iterator = iter(cache) + assert isinstance(next(iterator)[1].plan, cufft.PlanNd) + assert isinstance(next(iterator)[1].plan, cufft.Plan1d) + with pytest.raises(StopIteration): + next(iterator) + + # this brings Plan1d to the top + a = testing.shaped_random((10,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache.get_curr_size() == 2 <= cache.get_size() + iterator = iter(cache) + assert isinstance(next(iterator)[1].plan, cufft.Plan1d) + assert isinstance(next(iterator)[1].plan, cufft.PlanNd) + with pytest.raises(StopIteration): + next(iterator) + + # An LRU cache guarantees that such a silly operation never + # raises StopIteration + iterator = iter(cache) + for i in range(100): + cache[next(iterator)[0]] + + @testing.multi_gpu(2) + def test_LRU_cache5(self): + # test if the LRU cache is thread-local + + def init_caches(gpus): + for i in gpus: + with device.Device(i): + config.get_plan_cache() + + # Testing in the current thread: in setUp() we ensure all caches + # are initialized + stdout = intercept_stdout(config.show_plan_cache_info) + assert "uninitialized" not in stdout + + def thread_show_plan_cache_info(queue): + # allow output from another thread to be accessed by the + # main thread + cupy.cuda.Device().use() + stdout = intercept_stdout(config.show_plan_cache_info) + queue.put(stdout) + + # When starting a new thread, the cache is uninitialized there + # (for both devices) + q = queue.Queue() + thread = threading.Thread(target=thread_show_plan_cache_info, args=(q,)) + thread.start() + thread.join() + stdout = q.get() + assert stdout.count("uninitialized") == n_devices + + def thread_init_caches(gpus, queue): + cupy.cuda.Device().use() + init_caches(gpus) + thread_show_plan_cache_info(queue) + + # Now let's try initializing device 0 on another thread + thread = threading.Thread( + target=thread_init_caches, + args=( + [0], + q, + ), + ) + thread.start() + thread.join() + stdout = q.get() + assert stdout.count("uninitialized") == n_devices - 1 + + # ...and this time both devices + thread = threading.Thread( + target=thread_init_caches, + args=( + [0, 1], + q, + ), + ) + thread.start() + thread.join() + stdout = q.get() + assert stdout.count("uninitialized") == n_devices - 2 + + @testing.multi_gpu(2) + def test_LRU_cache6(self): + # test if each device has a separate cache + cache0 = self.caches[0] + cache1 = self.caches[1] + + # ensure a fresh state + assert cache0.get_curr_size() == 0 <= cache0.get_size() + assert cache1.get_curr_size() == 0 <= cache1.get_size() + + # do some computation on GPU 0 + with device.Device(0): + a = testing.shaped_random((10,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache0.get_curr_size() == 1 <= cache0.get_size() + assert cache1.get_curr_size() == 0 <= cache1.get_size() + + # do some computation on GPU 1 + with device.Device(1): + c = testing.shaped_random((16,), cupy, cupy.float64) + cupy.fft.fft(c) + assert cache0.get_curr_size() == 1 <= cache0.get_size() + assert cache1.get_curr_size() == 1 <= cache1.get_size() + + # reset device 0 + cache0.clear() + assert cache0.get_curr_size() == 0 <= cache0.get_size() + assert cache1.get_curr_size() == 1 <= cache1.get_size() + + # reset device 1 + cache1.clear() + assert cache0.get_curr_size() == 0 <= cache0.get_size() + assert cache1.get_curr_size() == 0 <= cache1.get_size() + + @testing.multi_gpu(2) + @pytest.mark.skipif( + runtime.is_hip, reason="hipFFT doesn't support multi-GPU" + ) + def test_LRU_cache7(self): + # test accessing a multi-GPU plan + cache0 = self.caches[0] + cache1 = self.caches[1] + + # ensure a fresh state + assert cache0.get_curr_size() == 0 <= cache0.get_size() + assert cache1.get_curr_size() == 0 <= cache1.get_size() + + # do some computation on GPU 0 + with device.Device(0): + a = testing.shaped_random((10,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache0.get_curr_size() == 1 <= cache0.get_size() + assert cache1.get_curr_size() == 0 <= cache1.get_size() + + # do a multi-GPU FFT + config.use_multi_gpus = True + config.set_cufft_gpus([0, 1]) + c = testing.shaped_random((128,), cupy, cupy.complex64) + cupy.fft.fft(c) + assert cache0.get_curr_size() == 2 <= cache0.get_size() + assert cache1.get_curr_size() == 1 <= cache1.get_size() + + # check both devices' caches see the same multi-GPU plan + plan0 = next(iter(cache0))[1].plan + plan1 = next(iter(cache1))[1].plan + assert plan0 is plan1 + + # reset + config.use_multi_gpus = False + config._device = None + + # do some computation on GPU 1 + with device.Device(1): + e = testing.shaped_random((20,), cupy, cupy.complex128) + cupy.fft.fft(e) + assert cache0.get_curr_size() == 2 <= cache0.get_size() + assert cache1.get_curr_size() == 2 <= cache1.get_size() + + # by this time, the multi-GPU plan remains the most recently + # used one on GPU 0, but not on GPU 1 + assert plan0 is next(iter(cache0))[1].plan + assert plan1 is not next(iter(cache1))[1].plan + + # now use it again to make it the most recent + config.use_multi_gpus = True + config.set_cufft_gpus([0, 1]) + c = testing.shaped_random((128,), cupy, cupy.complex64) + cupy.fft.fft(c) + assert cache0.get_curr_size() == 2 <= cache0.get_size() + assert cache1.get_curr_size() == 2 <= cache1.get_size() + assert plan0 is next(iter(cache0))[1].plan + assert plan1 is next(iter(cache1))[1].plan + # reset + config.use_multi_gpus = False + config._device = None + + # Do 2 more different FFTs on one of the devices, and the + # multi-GPU plan would be discarded from both caches + with device.Device(1): + x = testing.shaped_random((30,), cupy, cupy.complex128) + cupy.fft.fft(x) + y = testing.shaped_random((40, 40), cupy, cupy.complex64) + cupy.fft.fftn(y) + for _, node in cache0: + assert plan0 is not node.plan + for _, node in cache1: + assert plan1 is not node.plan + assert cache0.get_curr_size() == 1 <= cache0.get_size() + assert cache1.get_curr_size() == 2 <= cache1.get_size() + + def test_LRU_cache8(self): + # test if Plan1d and PlanNd can coexist in the same cache + cache = config.get_plan_cache() + assert cache.get_curr_size() == 0 <= cache.get_size() + + # do a 1D FFT + a = testing.shaped_random((10,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + assert isinstance(next(iter(cache))[1].plan, cufft.Plan1d) + + # then a 3D FFT + a = testing.shaped_random((8, 8, 8), cupy, cupy.complex128) + cupy.fft.fftn(a) + assert cache.get_curr_size() == 2 <= cache.get_size() + iterator = iter(cache) + + # the cached order is 1. PlanNd, 2. Plan1d + assert isinstance(next(iterator)[1].plan, cufft.PlanNd) + assert isinstance(next(iterator)[1].plan, cufft.Plan1d) + + def test_LRU_cache9(self): + # test if memsizes in the cache adds up + cache = config.get_plan_cache() + assert cache.get_curr_size() == 0 <= cache.get_size() + + memsize = 0 + a = testing.shaped_random((10,), cupy, cupy.float32) + cupy.fft.fft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + memsize += next(iter(cache))[1].plan.work_area.mem.size + + a = testing.shaped_random((48,), cupy, cupy.complex64) + cupy.fft.fft(a) + assert cache.get_curr_size() == 2 <= cache.get_size() + memsize += next(iter(cache))[1].plan.work_area.mem.size + + assert memsize == cache.get_curr_memsize() + + def test_LRU_cache10(self): + # test if deletion works and if show_info() is consistent with data + cache = config.get_plan_cache() + assert cache.get_curr_size() == 0 <= cache.get_size() + + curr_size = 0 + size = 2 + curr_memsize = 0 + memsize = "(unlimited)" # default + + a = testing.shaped_random((16, 16), cupy, cupy.float32) + cupy.fft.fft2(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + node1 = next(iter(cache))[1] + curr_size += 1 + curr_memsize += node1.plan.work_area.mem.size + stdout = intercept_stdout(cache.show_info) + assert "{0} / {1} (counts)".format(curr_size, size) in stdout + assert "{0} / {1} (bytes)".format(curr_memsize, memsize) in stdout + assert str(node1) in stdout + + a = testing.shaped_random((1024,), cupy, cupy.complex64) + cupy.fft.ifft(a) + assert cache.get_curr_size() == 2 <= cache.get_size() + node2 = next(iter(cache))[1] + curr_size += 1 + curr_memsize += node2.plan.work_area.mem.size + stdout = intercept_stdout(cache.show_info) + assert "{0} / {1} (counts)".format(curr_size, size) in stdout + assert "{0} / {1} (bytes)".format(curr_memsize, memsize) in stdout + assert str(node2) + "\n" + str(node1) in stdout + + # test deletion + key = node2.key + del cache[key] + assert cache.get_curr_size() == 1 <= cache.get_size() + curr_size -= 1 + curr_memsize -= node2.plan.work_area.mem.size + stdout = intercept_stdout(cache.show_info) + assert "{0} / {1} (counts)".format(curr_size, size) in stdout + assert "{0} / {1} (bytes)".format(curr_memsize, memsize) in stdout + assert str(node2) not in stdout + + @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) + @testing.multi_gpu(2) + @pytest.mark.skipif( + runtime.is_hip, reason="hipFFT doesn't support multi-GPU" + ) + def test_LRU_cache11(self): + # test if collectively deleting a multi-GPU plan works + _skip_multi_gpu_bug((128,), self.gpus) + cache0 = self.caches[0] + cache1 = self.caches[1] + + # ensure a fresh state + assert cache0.get_curr_size() == 0 <= cache0.get_size() + assert cache1.get_curr_size() == 0 <= cache1.get_size() + + # do a multi-GPU FFT + c = testing.shaped_random((128,), cupy, cupy.complex64) + cupy.fft.fft(c) + assert cache0.get_curr_size() == 1 <= cache0.get_size() + assert cache1.get_curr_size() == 1 <= cache1.get_size() + + node0 = next(iter(cache0))[1] + node1 = next(iter(cache1))[1] + assert node0.key == node1.key + assert node0.plan is node1.plan + assert cache0.get_curr_memsize() == node0.memsize > 0 + assert cache1.get_curr_memsize() == node1.memsize > 0 + + # delete + del cache0[node0.key] + assert cache0.get_curr_size() == 0 <= cache0.get_size() + assert cache1.get_curr_size() == 0 <= cache1.get_size() + assert cache0.get_curr_memsize() == 0 + assert cache1.get_curr_memsize() == 0 + + @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) + @testing.multi_gpu(2) + @pytest.mark.skipif( + runtime.is_hip, reason="hipFFT doesn't support multi-GPU" + ) + def test_LRU_cache12(self): + # test if an error is raise when one of the caches is unable + # to fit it a multi-GPU plan + cache0 = self.caches[0] + cache1 = self.caches[1] + + # ensure a fresh state + assert cache0.get_curr_size() == 0 <= cache0.get_size() + assert cache1.get_curr_size() == 0 <= cache1.get_size() + + # make it impossible to cache + cache1.set_memsize(1) + + # do a multi-GPU FFT + with pytest.raises(RuntimeError) as e: + c = testing.shaped_random((128,), cupy, cupy.complex64) + cupy.fft.fft(c) + assert "plan memsize is too large for device 1" in str(e.value) + assert cache0.get_curr_size() == 0 <= cache0.get_size() + assert cache1.get_curr_size() == 0 <= cache1.get_size() + + @unittest.skipIf(runtime.is_hip, "rocFFT has different plan sizes") + @unittest.skipIf( + runtime.runtimeGetVersion() >= 11080, + "CUDA 11.8 has different plan size", + ) + def test_LRU_cache13(self): + # test if plan insertion respect the memory size limit + cache = config.get_plan_cache() + cache.set_memsize(1024) + + # ensure a fresh state + assert cache.get_curr_size() == 0 <= cache.get_size() + + # On CUDA 10.0 + sm75, this generates a plan of size 1024 bytes + a = testing.shaped_random((128,), cupy, cupy.complex64) + cupy.fft.ifft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + assert cache.get_curr_memsize() == 1024 == cache.get_memsize() + + # a second plan (of same size) is generated, but the cache is full, + # so the first plan is evicted + a = testing.shaped_random((64,), cupy, cupy.complex128) + cupy.fft.ifft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + assert cache.get_curr_memsize() == 1024 == cache.get_memsize() + plan = next(iter(cache))[1].plan + + # this plan is twice as large, so won't fit in + a = testing.shaped_random((128,), cupy, cupy.complex128) + with pytest.raises(RuntimeError) as e: + cupy.fft.ifft(a) + assert "memsize is too large" in str(e.value) + # the cache remains intact + assert cache.get_curr_size() == 1 <= cache.get_size() + assert cache.get_curr_memsize() == 1024 == cache.get_memsize() + plan1 = next(iter(cache))[1].plan + assert plan1 is plan + + # double the cache size would make the plan just fit (and evict + # the existing one) + cache.set_memsize(2048) + cupy.fft.ifft(a) + assert cache.get_curr_size() == 1 <= cache.get_size() + assert cache.get_curr_memsize() == 2048 == cache.get_memsize() + plan2 = next(iter(cache))[1].plan + assert plan2 is not plan diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_callback.py b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py new file mode 100644 index 00000000000..56c06c16f74 --- /dev/null +++ b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py @@ -0,0 +1,831 @@ +import contextlib +import string +import sys +import tempfile +from unittest import mock + +import numpy as np +import pytest +from cupy import testing + +import dpnp as cupy + +pytest.skip("FFT callbacks are not supported", allow_module_level=True) + + +@contextlib.contextmanager +def use_temporary_cache_dir(): + target = "cupy.fft._callback.get_cache_dir" + with tempfile.TemporaryDirectory() as path: + with mock.patch(target, lambda: path): + yield path + + +_load_callback = r""" +__device__ ${data_type} CB_ConvertInput( + void* dataIn, size_t offset, void* callerInfo, void* sharedPtr) +{ + ${data_type} x = ((${data_type}*)dataIn)[offset]; + ${element} *= 2.5; + return x; +} + +__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput; +""" + +_load_callback_with_aux = r""" +__device__ ${data_type} CB_ConvertInput( + void* dataIn, size_t offset, void* callerInfo, void* sharedPtr) +{ + ${data_type} x = ((${data_type}*)dataIn)[offset]; + ${element} *= *((${aux_type}*)callerInfo); + return x; +} + +__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput; +""" + +_load_callback_with_aux2 = r""" +__device__ ${data_type} CB_ConvertInput( + void* dataIn, size_t offset, void* callerInfo, void* sharedPtr) +{ + ${data_type} x = ((${data_type}*)dataIn)[offset]; + ${element} *= ((${aux_type}*)callerInfo)[offset]; + return x; +} + +__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput; +""" + +_store_callback = r""" +__device__ void CB_ConvertOutput( + void *dataOut, size_t offset, ${data_type} element, + void *callerInfo, void *sharedPointer) +{ + ${data_type} x = element; + ${element} /= 3.8; + ((${data_type}*)dataOut)[offset] = x; +} + +__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput; +""" + +_store_callback_with_aux = r""" +__device__ void CB_ConvertOutput( + void *dataOut, size_t offset, ${data_type} element, + void *callerInfo, void *sharedPointer) +{ + ${data_type} x = element; + ${element} /= *((${aux_type}*)callerInfo); + ((${data_type}*)dataOut)[offset] = x; +} + +__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput; +""" + + +def _set_load_cb(code, element, data_type, callback_type, aux_type=None): + return string.Template(code).substitute( + data_type=data_type, + aux_type=aux_type, + load_type=callback_type, + element=element, + ) + + +def _set_store_cb(code, element, data_type, callback_type, aux_type=None): + return string.Template(code).substitute( + data_type=data_type, + aux_type=aux_type, + store_type=callback_type, + element=element, + ) + + +@testing.parameterize( + *testing.product( + { + "n": [None, 5, 10, 15], + "shape": [(10, 7), (10,), (10, 10)], + "norm": [None, "ortho"], + } + ) +) +@testing.with_requires("cython>=0.29.0") +@pytest.mark.skipif( + not sys.platform.startswith("linux"), + reason="callbacks are only supported on Linux", +) +@pytest.mark.skipif( + cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks" +) +class Test1dCallbacks: + + def _test_load_helper(self, xp, dtype, fft_func): + fft = getattr(xp.fft, fft_func) + code = _load_callback + if dtype == np.complex64: + types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + elif dtype == np.complex128: + types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + elif dtype == np.float32: + types = ("x", "cufftReal", "cufftCallbackLoadR") + else: + types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") + cb_load = _set_load_cb(code, *types) + + a = testing.shaped_random(self.shape, xp, dtype) + if xp is np: + a.real *= 2.5 + out = fft(a, n=self.n, norm=self.norm) + if dtype in (np.float32, np.complex64): + if fft_func != "irfft": + out = out.astype(np.complex64) + else: + out = out.astype(np.float32) + else: + with use_temporary_cache_dir(): + with xp.fft.config.set_cufft_callbacks(cb_load=cb_load): + out = fft(a, n=self.n, norm=self.norm) + + return out + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_fft_load(self, xp, dtype): + return self._test_load_helper(xp, dtype, "fft") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_ifft_load(self, xp, dtype): + return self._test_load_helper(xp, dtype, "ifft") + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_rfft_load(self, xp, dtype): + return self._test_load_helper(xp, dtype, "rfft") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_irfft_load(self, xp, dtype): + return self._test_load_helper(xp, dtype, "irfft") + + def _test_store_helper(self, xp, dtype, fft_func): + fft = getattr(xp.fft, fft_func) + code = _store_callback + if dtype == np.complex64: + if fft_func != "irfft": + types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + else: + types = ("x", "cufftReal", "cufftCallbackStoreR") + elif dtype == np.complex128: + if fft_func != "irfft": + types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") + else: + types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") + elif dtype == np.float32: + types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + elif dtype == np.float64: + types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") + cb_store = _set_store_cb(code, *types) + + a = testing.shaped_random(self.shape, xp, dtype) + if xp is np: + out = fft(a, n=self.n, norm=self.norm) + if fft_func != "irfft": + out.imag /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.complex64) + else: + out /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.float32) + else: + with use_temporary_cache_dir(): + with xp.fft.config.set_cufft_callbacks(cb_store=cb_store): + out = fft(a, n=self.n, norm=self.norm) + + return out + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_fft_store(self, xp, dtype): + return self._test_store_helper(xp, dtype, "fft") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_ifft_store(self, xp, dtype): + return self._test_store_helper(xp, dtype, "ifft") + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_rfft_store(self, xp, dtype): + return self._test_store_helper(xp, dtype, "rfft") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_irfft_store(self, xp, dtype): + return self._test_store_helper(xp, dtype, "irfft") + + def _test_load_store_helper(self, xp, dtype, fft_func): + fft = getattr(xp.fft, fft_func) + load_code = _load_callback + store_code = _store_callback + if fft_func in ("fft", "ifft"): + if dtype == np.complex64: + load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + else: + load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + store_types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + ) + elif fft_func == "rfft": + if dtype == np.float32: + load_types = ("x", "cufftReal", "cufftCallbackLoadR") + store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + else: + load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") + store_types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + ) + else: # irfft + if dtype == np.complex64: + load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + store_types = ("x", "cufftReal", "cufftCallbackStoreR") + else: + load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") + cb_load = _set_load_cb(load_code, *load_types) + cb_store = _set_store_cb(store_code, *store_types) + + a = testing.shaped_random(self.shape, xp, dtype) + if xp is np: + a.real *= 2.5 + out = fft(a, n=self.n, norm=self.norm) + if fft_func != "irfft": + out.imag /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.complex64) + else: + out /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.float32) + else: + with use_temporary_cache_dir(): + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, cb_store=cb_store + ): + out = fft(a, n=self.n, norm=self.norm) + + return out + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_fft_load_store(self, xp, dtype): + return self._test_load_store_helper(xp, dtype, "fft") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_ifft_load_store(self, xp, dtype): + return self._test_load_store_helper(xp, dtype, "ifft") + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_rfft_load_store(self, xp, dtype): + return self._test_load_store_helper(xp, dtype, "rfft") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_irfft_load_store(self, xp, dtype): + return self._test_load_store_helper(xp, dtype, "irfft") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_fft_load_aux(self, xp, dtype): + fft = xp.fft.fft + c = _load_callback_with_aux2 + if dtype == np.complex64: + cb_load = _set_load_cb( + c, "x.x", "cufftComplex", "cufftCallbackLoadC", "float" + ) + else: + cb_load = _set_load_cb( + c, "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", "double" + ) + + a = testing.shaped_random(self.shape, xp, dtype) + out_last = self.n if self.n is not None else self.shape[-1] + out_shape = list(self.shape) + out_shape[-1] = out_last + last_min = min(self.shape[-1], out_last) + b = xp.arange(np.prod(out_shape), dtype=xp.dtype(dtype).char.lower()) + b = b.reshape(out_shape) + if xp is np: + x = np.zeros(out_shape, dtype=dtype) + x[..., 0:last_min] = a[..., 0:last_min] + x.real *= b + out = fft(x, n=self.n, norm=self.norm) + if dtype in (np.float32, np.complex64): + out = out.astype(np.complex64) + else: + with use_temporary_cache_dir(): + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, cb_load_aux_arr=b + ): + out = fft(a, n=self.n, norm=self.norm) + + return out + + def _test_load_store_aux_helper(self, xp, dtype, fft_func): + fft = getattr(xp.fft, fft_func) + load_code = _load_callback_with_aux + store_code = _store_callback_with_aux + if xp is cupy: + load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower()) + store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower()) + + if fft_func in ("fft", "ifft"): + if dtype == np.complex64: + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "float", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "float", + ) + else: + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "double", + ) + store_types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "double", + ) + elif fft_func == "rfft": + if dtype == np.float32: + load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float") + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "float", + ) + else: + load_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "double", + ) + store_types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "double", + ) + else: # irfft + if dtype == np.complex64: + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "float", + ) + store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float") + else: + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "double", + ) + store_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "double", + ) + cb_load = _set_load_cb(load_code, *load_types) + cb_store = _set_store_cb(store_code, *store_types) + + a = testing.shaped_random(self.shape, xp, dtype) + if xp is np: + a.real *= 2.5 + out = fft(a, n=self.n, norm=self.norm) + if fft_func != "irfft": + out.imag /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.complex64) + else: + out /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.float32) + else: + with use_temporary_cache_dir(): + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_store=cb_store, + cb_load_aux_arr=load_aux, + cb_store_aux_arr=store_aux, + ): + out = fft(a, n=self.n, norm=self.norm) + + return out + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_fft_load_store_aux(self, xp, dtype): + return self._test_load_store_aux_helper(xp, dtype, "fft") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_ifft_load_store_aux(self, xp, dtype): + return self._test_load_store_aux_helper(xp, dtype, "ifft") + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_rfft_load_store_aux(self, xp, dtype): + return self._test_load_store_aux_helper(xp, dtype, "rfft") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) + def test_irfft_load_store_aux(self, xp, dtype): + return self._test_load_store_aux_helper(xp, dtype, "irfft") + + +@testing.parameterize( + {"shape": (3, 4), "s": None, "axes": None, "norm": None}, + {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1), "norm": None}, + {"shape": (3, 4), "s": None, "axes": (-2, -1), "norm": None}, + {"shape": (3, 4), "s": None, "axes": None, "norm": "ortho"}, + {"shape": (2, 3, 4), "s": None, "axes": None, "norm": None}, + {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1), "norm": None}, + {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1), "norm": None}, + {"shape": (2, 3, 4), "s": None, "axes": None, "norm": "ortho"}, + {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2), "norm": "ortho"}, +) +@testing.with_requires("cython>=0.29.0") +@pytest.mark.skipif( + not sys.platform.startswith("linux"), + reason="callbacks are only supported on Linux", +) +@pytest.mark.skipif( + cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks" +) +class TestNdCallbacks: + + def _test_load_helper(self, xp, dtype, fft_func): + fft = getattr(xp.fft, fft_func) + load_code = _load_callback + if dtype == np.complex64: + types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + elif dtype == np.complex128: + types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + elif dtype == np.float32: + types = ("x", "cufftReal", "cufftCallbackLoadR") + else: + types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") + cb_load = _set_load_cb(load_code, *types) + + a = testing.shaped_random(self.shape, xp, dtype) + if xp is np: + a.real *= 2.5 + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + if dtype in (np.float32, np.complex64): + if fft_func != "irfftn": + out = out.astype(np.complex64) + else: + out = out.astype(np.float32) + else: + with use_temporary_cache_dir(): + with xp.fft.config.set_cufft_callbacks(cb_load=cb_load): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + + return out + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_fftn_load(self, xp, dtype): + return self._test_load_helper(xp, dtype, "fftn") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_ifftn_load(self, xp, dtype): + return self._test_load_helper(xp, dtype, "ifftn") + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_rfftn_load(self, xp, dtype): + return self._test_load_helper(xp, dtype, "rfftn") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_irfftn_load(self, xp, dtype): + return self._test_load_helper(xp, dtype, "irfftn") + + def _test_store_helper(self, xp, dtype, fft_func): + fft = getattr(xp.fft, fft_func) + store_code = _store_callback + if dtype == np.complex64: + if fft_func != "irfftn": + types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + else: + types = ("x", "cufftReal", "cufftCallbackStoreR") + elif dtype == np.complex128: + if fft_func != "irfftn": + types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") + else: + types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") + elif dtype == np.float32: + types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + elif dtype == np.float64: + types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") + cb_store = _set_store_cb(store_code, *types) + + a = testing.shaped_random(self.shape, xp, dtype) + if xp is np: + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + if fft_func != "irfftn": + out.imag /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.complex64) + else: + out /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.float32) + else: + with use_temporary_cache_dir(): + with xp.fft.config.set_cufft_callbacks(cb_store=cb_store): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + + return out + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_fftn_store(self, xp, dtype): + return self._test_store_helper(xp, dtype, "fftn") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_ifftn_store(self, xp, dtype): + return self._test_store_helper(xp, dtype, "ifftn") + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_rfftn_store(self, xp, dtype): + return self._test_store_helper(xp, dtype, "rfftn") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_irfftn_store(self, xp, dtype): + return self._test_store_helper(xp, dtype, "irfftn") + + def _test_load_store_helper(self, xp, dtype, fft_func): + fft = getattr(xp.fft, fft_func) + load_code = _load_callback + store_code = _store_callback + if fft_func in ("fftn", "ifftn"): + if dtype == np.complex64: + load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + else: + load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + store_types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + ) + elif fft_func == "rfftn": + if dtype == np.float32: + load_types = ("x", "cufftReal", "cufftCallbackLoadR") + store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + else: + load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") + store_types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + ) + else: # irfft + if dtype == np.complex64: + load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + store_types = ("x", "cufftReal", "cufftCallbackStoreR") + else: + load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") + cb_load = _set_load_cb(load_code, *load_types) + cb_store = _set_store_cb(store_code, *store_types) + + a = testing.shaped_random(self.shape, xp, dtype) + if xp is np: + a.real *= 2.5 + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + if fft_func != "irfftn": + out.imag /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.complex64) + else: + out /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.float32) + else: + with use_temporary_cache_dir(): + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, cb_store=cb_store + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + + return out + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_fftn_load_store(self, xp, dtype): + return self._test_load_store_helper(xp, dtype, "fftn") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_ifftn_load_store(self, xp, dtype): + return self._test_load_store_helper(xp, dtype, "ifftn") + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_rfftn_load_store(self, xp, dtype): + return self._test_load_store_helper(xp, dtype, "rfftn") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_irfftn_load_store(self, xp, dtype): + return self._test_load_store_helper(xp, dtype, "irfftn") + + def _test_load_store_aux_helper(self, xp, dtype, fft_func): + fft = getattr(xp.fft, fft_func) + load_code = _load_callback_with_aux + store_code = _store_callback_with_aux + if xp is cupy: + load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower()) + store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower()) + + if fft_func in ("fftn", "ifftn"): + if dtype == np.complex64: + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "float", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "float", + ) + else: + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "double", + ) + store_types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "double", + ) + elif fft_func == "rfftn": + if dtype == np.float32: + load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float") + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "float", + ) + else: + load_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "double", + ) + store_types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "double", + ) + else: # irfftn + if dtype == np.complex64: + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "float", + ) + store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float") + else: + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "double", + ) + store_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "double", + ) + cb_load = _set_load_cb(load_code, *load_types) + cb_store = _set_store_cb(store_code, *store_types) + + a = testing.shaped_random(self.shape, xp, dtype) + if xp is np: + a.real *= 2.5 + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + if fft_func != "irfftn": + out.imag /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.complex64) + else: + out /= 3.8 + if dtype in (np.float32, np.complex64): + out = out.astype(np.float32) + else: + with use_temporary_cache_dir(): + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_store=cb_store, + cb_load_aux_arr=load_aux, + cb_store_aux_arr=store_aux, + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + + return out + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_fftn_load_store_aux(self, xp, dtype): + return self._test_load_store_aux_helper(xp, dtype, "fftn") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_ifftn_load_store_aux(self, xp, dtype): + return self._test_load_store_aux_helper(xp, dtype, "ifftn") + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_rfftn_load_store_aux(self, xp, dtype): + return self._test_load_store_aux_helper(xp, dtype, "rfftn") + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_irfftn_load_store_aux(self, xp, dtype): + return self._test_load_store_aux_helper(xp, dtype, "irfftn") diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py index 918b6e2a23f..d458c7fba30 100644 --- a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py +++ b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py @@ -1,4 +1,5 @@ import functools +import warnings import numpy as np import pytest @@ -6,6 +7,7 @@ import dpnp as cupy from dpnp.tests.helper import has_support_aspect64 from dpnp.tests.third_party.cupy import testing +from dpnp.tests.third_party.cupy.testing._loops import _wraps_partial @pytest.fixture @@ -15,20 +17,102 @@ def skip_forward_backward(request): pytest.skip("forward/backward is supported by NumPy 1.20+") +def nd_planning_states(states=[True, False], name="enable_nd"): + """Decorator for parameterized tests with and without nd planning + + Tests are repeated with config.enable_nd_planning set to True and False + + Args: + states(list of bool): The boolean cases to test. + name(str): Argument name to which specified dtypes are passed. + + This decorator adds a keyword argument specified by ``name`` + to the test fixture. Then, it runs the fixtures in parallel + by passing the each element of ``dtypes`` to the named + argument. + """ + + def decorator(impl): + @_wraps_partial(impl, name) + def test_func(self, *args, **kw): + # get original global planning state + # planning_state = config.enable_nd_planning + try: + for nd_planning in states: + try: + # enable or disable nd planning + # config.enable_nd_planning = nd_planning + + kw[name] = nd_planning + impl(self, *args, **kw) + except Exception: + print(name, "is", nd_planning) + raise + finally: + # restore original global planning state + # config.enable_nd_planning = planning_state + pass + + return test_func + + return decorator + + +def multi_gpu_config(gpu_configs=None): + """Decorator for parameterized tests with different GPU configurations. + + Args: + gpu_configs (list of list): The GPUs to test. + + .. notes: + The decorated tests are skipped if no or only one GPU is available. + """ + + def decorator(impl): + @functools.wraps(impl) + def test_func(self, *args, **kw): + use_multi_gpus = config.use_multi_gpus + _devices = config._devices + + try: + for gpus in gpu_configs: + try: + nGPUs = len(gpus) + assert nGPUs >= 2, "Must use at least two gpus" + config.use_multi_gpus = True + config.set_cufft_gpus(gpus) + self.gpus = gpus + + impl(self, *args, **kw) + except Exception: + print("GPU config is:", gpus) + raise + finally: + config.use_multi_gpus = use_multi_gpus + config._devices = _devices + del self.gpus + + return test_func + + return decorator + + +# @testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *testing.product( { "n": [None, 0, 5, 10, 15], "shape": [(0,), (10, 0), (10,), (10, 10)], - "norm": [None, "backward", "ortho", "forward", ""], + "norm": [None, "backward", "ortho", "forward"], } ) ) class TestFft: + @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, @@ -36,17 +120,11 @@ class TestFft: ) def test_fft(self, xp, dtype): a = testing.shaped_random(self.shape, xp, dtype) - out = xp.fft.fft(a, n=self.n, norm=self.norm) - - # np.fft.fft always returns np.complex128 - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - - return out + return xp.fft.fft(a, n=self.n, norm=self.norm) @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, @@ -57,14 +135,10 @@ def test_fft(self, xp, dtype): @testing.with_requires("numpy!=1.17.1") def test_ifft(self, xp, dtype): a = testing.shaped_random(self.shape, xp, dtype) - out = xp.fft.ifft(a, n=self.n, norm=self.norm) - - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - - return out + return xp.fft.ifft(a, n=self.n, norm=self.norm) +# @testing.with_requires("numpy>=2.0") @testing.parameterize( *testing.product( { @@ -75,10 +149,11 @@ def test_ifft(self, xp, dtype): ) ) class TestFftOrder: + @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, - atol=1e-6, + rtol=1e-3, + atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), @@ -87,17 +162,11 @@ def test_fft(self, xp, dtype): a = testing.shaped_random(self.shape, xp, dtype) if self.data_order == "F": a = xp.asfortranarray(a) - out = xp.fft.fft(a, axis=self.axis) - - # np.fft.fft always returns np.complex128 - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - - return out + return xp.fft.fft(a, axis=self.axis) @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, @@ -107,123 +176,308 @@ def test_ifft(self, xp, dtype): a = testing.shaped_random(self.shape, xp, dtype) if self.data_order == "F": a = xp.asfortranarray(a) - out = xp.fft.ifft(a, axis=self.axis) + return xp.fft.ifft(a, axis=self.axis) + + +# See #3757 and NVIDIA internal ticket 3093094 +def _skip_multi_gpu_bug(shape, gpus): + # avoid CUDA 11.0 (will be fixed by CUDA 11.2) bug triggered by + # - batch = 1 + # - gpus = [1, 0] + if ( + 11000 <= cupy.cuda.runtime.runtimeGetVersion() < 11020 + and len(shape) == 1 + and gpus == [1, 0] + ): + pytest.skip("avoid CUDA 11 bug") + + +# Almost identical to the TestFft class, except that +# 1. multi-GPU cuFFT is used +# 2. the tested parameter combinations are adjusted to meet the requirements +@testing.with_requires("numpy>=2.0") +@pytest.mark.usefixtures("skip_forward_backward") +@testing.parameterize( + *testing.product( + { + "n": [None, 0, 64], + "shape": [(0,), (0, 10), (64,), (4, 64)], + "norm": [None, "backward", "ortho", "forward"], + } + ) +) +@pytest.mark.skip("multi GPU is not supported") +@testing.multi_gpu(2) +class TestMultiGpuFft: - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) + @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_fft(self, xp, dtype): + _skip_multi_gpu_bug(self.shape, self.gpus) - return out + a = testing.shaped_random(self.shape, xp, dtype) + return xp.fft.fft(a, n=self.n, norm=self.norm) + + @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + # NumPy 1.17.0 and 1.17.1 raises ZeroDivisonError due to a bug + @testing.with_requires("numpy!=1.17.0") + @testing.with_requires("numpy!=1.17.1") + def test_ifft(self, xp, dtype): + _skip_multi_gpu_bug(self.shape, self.gpus) + + a = testing.shaped_random(self.shape, xp, dtype) + return xp.fft.ifft(a, n=self.n, norm=self.norm) + + +# Almost identical to the TestFftOrder class, except that +# 1. multi-GPU cuFFT is used +# 2. the tested parameter combinations are adjusted to meet the requirements +@testing.with_requires("numpy>=2.0") +@testing.parameterize( + *testing.product( + { + "shape": [(10, 10), (10, 5, 10)], + "data_order": ["F", "C"], + "axis": [0, 1, -1], + } + ) +) +@pytest.mark.skip("multi GPU is not supported") +@testing.multi_gpu(2) +class TestMultiGpuFftOrder: + @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_fft(self, xp, dtype): + _skip_multi_gpu_bug(self.shape, self.gpus) + + a = testing.shaped_random(self.shape, xp, dtype) + if self.data_order == "F": + a = xp.asfortranarray(a) + return xp.fft.fft(a, axis=self.axis) + @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_ifft(self, xp, dtype): + _skip_multi_gpu_bug(self.shape, self.gpus) + a = testing.shaped_random(self.shape, xp, dtype) + if self.data_order == "F": + a = xp.asfortranarray(a) + return xp.fft.ifft(a, axis=self.axis) + + +@pytest.mark.skip("default FFT function is not supported") +@testing.with_requires("numpy>=2.0") +class TestDefaultPlanType: + + @nd_planning_states() + def test_default_fft_func(self, enable_nd): + # test cases where nd cuFFT plan is possible + ca = cupy.ones((16, 16, 16)) + for axes in [(0, 1), (1, 2), None, (0, 1, 2)]: + fft_func = _default_fft_func(ca, axes=axes) + if enable_nd: + # TODO(leofang): test newer ROCm versions + if axes == (0, 1) and cupy.cuda.runtime.is_hip: + assert fft_func is _fft + else: + assert fft_func is _fftn + else: + assert fft_func is _fft + + # only a single axis is transformed -> 1d plan preferred + for axes in [(0,), (1,), (2,)]: + assert _default_fft_func(ca, axes=axes) is _fft + + # non-contiguous axes -> nd plan not possible + assert _default_fft_func(ca, axes=(0, 2)) is _fft + + # >3 axes transformed -> nd plan not possible + ca = cupy.ones((2, 4, 6, 8)) + assert _default_fft_func(ca) is _fft + + # first or last axis not included -> nd plan not possible + assert _default_fft_func(ca, axes=(1,)) is _fft + + # for rfftn + ca = cupy.random.random((4, 2, 6)) + for s, axes in zip([(3, 4), None, (8, 7, 5)], [(-2, -1), (0, 1), None]): + fft_func = _default_fft_func(ca, s=s, axes=axes, value_type="R2C") + if enable_nd: + # TODO(leofang): test newer ROCm versions + if axes == (0, 1) and cupy.cuda.runtime.is_hip: + assert fft_func is _fft + else: + assert fft_func is _fftn + else: + assert fft_func is _fft + + # nd plan not possible if last axis is not 0 or ndim-1 + assert _default_fft_func(ca, axes=(2, 1), value_type="R2C") is _fft + + # for irfftn + ca = cupy.random.random((4, 2, 6)).astype(cupy.complex128) + for s, axes in zip([(3, 4), None, (8, 7, 5)], [(-2, -1), (0, 1), None]): + fft_func = _default_fft_func(ca, s=s, axes=axes, value_type="C2R") + if enable_nd: + # To get around hipFFT's bug, we don't use PlanNd for C2R + # TODO(leofang): test newer ROCm versions + if cupy.cuda.runtime.is_hip: + assert fft_func is _fft + else: + assert fft_func is _fftn + else: + assert fft_func is _fft + + # nd plan not possible if last axis is not 0 or ndim-1 + assert _default_fft_func(ca, axes=(2, 1), value_type="C2R") is _fft + + +@pytest.mark.skip("memory management is not supported") +@testing.with_requires("numpy>=2.0") +@testing.slow +class TestFftAllocate: + + def test_fft_allocate(self): + # Check CuFFTError is not raised when the GPU memory is enough. + # See https://github.com/cupy/cupy/issues/1063 + # TODO(mizuno): Simplify "a" after memory compaction is implemented. + a = [] + for i in range(10): + a.append(cupy.empty(100000000)) + del a + b = cupy.empty(100000007, dtype=cupy.float32) + cupy.fft.fft(b) + # Free huge memory for slow test + del b + cupy.get_default_memory_pool().free_all_blocks() + # Clean up FFT plan cache + cupy.fft.config.clear_plan_cache() + + +# @testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *( testing.product_dict( [ - # some of the following cases are modified, since in NumPy 2.0.0 - # `s` must contain only integer `s`, not None values, and - # If `s` is not None, `axes` must not be None either. {"shape": (3, 4), "s": None, "axes": None}, - {"shape": (3, 4), "s": (1, 4), "axes": (0, 1)}, - {"shape": (3, 4), "s": (1, 5), "axes": (0, 1)}, + {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1)}, {"shape": (3, 4), "s": None, "axes": (-2, -1)}, {"shape": (3, 4), "s": None, "axes": (-1, -2)}, # {"shape": (3, 4), "s": None, "axes": (0,)}, # mkl_fft gh-109 + {"shape": (3, 4), "s": None, "axes": None}, # {"shape": (3, 4), "s": None, "axes": ()}, # mkl_fft gh-108 {"shape": (2, 3, 4), "s": None, "axes": None}, - {"shape": (2, 3, 4), "s": (1, 4, 4), "axes": (0, 1, 2)}, - {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (0, 1, 2)}, + {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-2, -1)}, {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)}, # {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, # mkl_fft gh-109 + {"shape": (2, 3, 4), "s": None, "axes": None}, # {"shape": (2, 3, 4), "s": None, "axes": ()}, # mkl_fft gh-108 # {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)}, # mkl_fft gh-109 {"shape": (2, 3, 4, 5), "s": None, "axes": None}, # {"shape": (0, 5), "s": None, "axes": None}, # mkl_fft gh-110 # {"shape": (2, 0, 5), "s": None, "axes": None}, # mkl_fft gh-110 # {"shape": (0, 0, 5), "s": None, "axes": None}, # mkl_fft gh-110 - {"shape": (3, 4), "s": (0, 5), "axes": (0, 1)}, - {"shape": (3, 4), "s": (1, 0), "axes": (0, 1)}, + {"shape": (3, 4), "s": (0, 5), "axes": (-2, -1)}, + {"shape": (3, 4), "s": (1, 0), "axes": (-2, -1)}, ], - testing.product( - {"norm": [None, "backward", "ortho", "forward", ""]} - ), + testing.product({"norm": [None, "backward", "ortho", "forward"]}), ) ) ) class TestFft2: + + @nd_planning_states() @testing.for_orders("CF") @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), ) - def test_fft2(self, xp, dtype, order): + def test_fft2(self, xp, dtype, order, enable_nd): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) - out = xp.fft.fft2(a, s=self.s, axes=self.axes, norm=self.norm) + + with warnings.catch_warnings(): + # axis=None and s != None, NumPy 2.0 + warnings.simplefilter("ignore", DeprecationWarning) + out = xp.fft.fft2(a, s=self.s, axes=self.axes, norm=self.norm) if self.axes is not None and not self.axes: assert out is a return out - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - return out + @nd_planning_states() @testing.for_orders("CF") @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), ) - def test_ifft2(self, xp, dtype, order): + def test_ifft2(self, xp, dtype, order, enable_nd): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) - out = xp.fft.ifft2(a, s=self.s, axes=self.axes, norm=self.norm) + + with warnings.catch_warnings(): + # axis=None and s != None, NumPy 2.0 + warnings.simplefilter("ignore", DeprecationWarning) + out = xp.fft.ifft2(a, s=self.s, axes=self.axes, norm=self.norm) if self.axes is not None and not self.axes: assert out is a return out - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - return out +# @testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *( testing.product_dict( [ - # some of the following cases are modified, since in NumPy 2.0.0 - # `s` must contain only integer `s`, not None values, and - # If `s` is not None, `axes` must not be None either. {"shape": (3, 4), "s": None, "axes": None}, - {"shape": (3, 4), "s": (1, 4), "axes": (0, 1)}, - {"shape": (3, 4), "s": (1, 5), "axes": (0, 1)}, + {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1)}, {"shape": (3, 4), "s": None, "axes": (-2, -1)}, {"shape": (3, 4), "s": None, "axes": (-1, -2)}, {"shape": (3, 4), "s": None, "axes": [-1, -2]}, # {"shape": (3, 4), "s": None, "axes": (0,)}, # mkl_fft gh-109 # {"shape": (3, 4), "s": None, "axes": ()}, # mkl_fft gh-108 + {"shape": (3, 4), "s": None, "axes": None}, {"shape": (2, 3, 4), "s": None, "axes": None}, - {"shape": (2, 3, 4), "s": (1, 4, 4), "axes": (0, 1, 2)}, - {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (0, 1, 2)}, + {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)}, {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)}, # {"shape": (2, 3, 4), "s": None, "axes": (-1, -3)}, # mkl_fft gh-109 # {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, # mkl_fft gh-109 + {"shape": (2, 3, 4), "s": None, "axes": None}, # {"shape": (2, 3, 4), "s": None, "axes": ()}, # mkl_fft gh-108 # {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)}, # mkl_fft gh-109 {"shape": (2, 3, 4), "s": (4, 3, 2), "axes": (2, 0, 1)}, @@ -232,62 +486,402 @@ def test_ifft2(self, xp, dtype, order): # {"shape": (2, 0, 5), "s": None, "axes": None}, # mkl_fft gh-110 # {"shape": (0, 0, 5), "s": None, "axes": None}, # mkl_fft gh-110 ], - testing.product( - {"norm": [None, "backward", "ortho", "forward", ""]} - ), + testing.product({"norm": [None, "backward", "ortho", "forward"]}), ) ) ) class TestFftn: + + @nd_planning_states() @testing.for_orders("CF") @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), ) - def test_fftn(self, xp, dtype, order): + def test_fftn(self, xp, dtype, order, enable_nd): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) - out = xp.fft.fftn(a, s=self.s, axes=self.axes, norm=self.norm) + + with warnings.catch_warnings(): + # axis=None and s != None, NumPy 2.0 + warnings.simplefilter("ignore", DeprecationWarning) + out = xp.fft.fftn(a, s=self.s, axes=self.axes, norm=self.norm) if self.axes is not None and not self.axes: assert out is a return out - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - return out + @nd_planning_states() @testing.for_orders("CF") @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), ) - def test_ifftn(self, xp, dtype, order): + def test_ifftn(self, xp, dtype, order, enable_nd): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) - out = xp.fft.ifftn(a, s=self.s, axes=self.axes, norm=self.norm) + + with warnings.catch_warnings(): + # axis=None and s != None, NumPy 2.0 + warnings.simplefilter("ignore", DeprecationWarning) + out = xp.fft.ifftn(a, s=self.s, axes=self.axes, norm=self.norm) if self.axes is not None and not self.axes: assert out is a return out - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - return out +@pytest.mark.skip("get_fft_plan() is not supported") +@testing.with_requires("numpy>=2.0") +@pytest.mark.usefixtures("skip_forward_backward") +@testing.parameterize( + *( + testing.product_dict( + [ + {"shape": (3, 4), "s": None, "axes": None}, + {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1)}, + {"shape": (3, 4), "s": None, "axes": (-2, -1)}, + {"shape": (3, 4), "s": None, "axes": (-1, -2)}, + {"shape": (3, 4), "s": None, "axes": (0,)}, + {"shape": (3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)}, + {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, + {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)}, + {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, + {"shape": (2, 3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": (2, 3), "axes": (-2, -1)}, + {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)}, + {"shape": (0, 5), "s": None, "axes": None}, + {"shape": (2, 0, 5), "s": None, "axes": None}, + {"shape": (0, 0, 5), "s": None, "axes": None}, + ], + testing.product({"norm": [None, "backward", "ortho", "forward"]}), + ) + ) +) +class TestPlanCtxManagerFftn: + + @pytest.fixture(autouse=True) + def skip_buggy(self): + if cupy.cuda.runtime.is_hip: + # TODO(leofang): test newer ROCm versions + if self.axes == (0, 1) and self.shape == (2, 3, 4): + pytest.skip( + "hipFFT's PlanNd for this case " + "is buggy, so Plan1d is generated " + "instead" + ) + + @nd_planning_states() + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_fftn(self, xp, dtype, enable_nd): + assert config.enable_nd_planning == enable_nd + a = testing.shaped_random(self.shape, xp, dtype) + + if xp is np: + return xp.fft.fftn(a, s=self.s, axes=self.axes, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + + plan = get_fft_plan(a, self.s, self.axes) + with plan: + return xp.fft.fftn(a, s=self.s, axes=self.axes, norm=self.norm) + + @nd_planning_states() + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_ifftn(self, xp, dtype, enable_nd): + assert config.enable_nd_planning == enable_nd + a = testing.shaped_random(self.shape, xp, dtype) + + if xp is np: + return xp.fft.ifftn(a, s=self.s, axes=self.axes, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + + plan = get_fft_plan(a, self.s, self.axes) + with plan: + return xp.fft.ifftn(a, s=self.s, axes=self.axes, norm=self.norm) + + @nd_planning_states() + @testing.for_complex_dtypes() + def test_fftn_error_on_wrong_plan(self, dtype, enable_nd): + if 0 in self.shape: + pytest.skip("0 in shape") + # This test ensures the context manager plan is picked up + + from cupy.fft import fftn + from cupyx.scipy.fftpack import get_fft_plan + + assert config.enable_nd_planning == enable_nd + + # can't get a plan, so skip + if self.axes is not None: + if self.s is not None: + if len(self.s) != len(self.axes): + return + elif len(self.shape) != len(self.axes): + return + + a = testing.shaped_random(self.shape, cupy, dtype) + bad_in_shape = tuple(2 * i for i in self.shape) + if self.s is None: + bad_out_shape = bad_in_shape + else: + bad_out_shape = tuple(2 * i for i in self.s) + b = testing.shaped_random(bad_in_shape, cupy, dtype) + plan_wrong = get_fft_plan(b, bad_out_shape, self.axes) + + with pytest.raises(ValueError) as ex, plan_wrong: + fftn(a, s=self.s, axes=self.axes, norm=self.norm) + # targeting a particular error + assert "The cuFFT plan and a.shape do not match" in str(ex.value) + + +@pytest.mark.skip("get_fft_plan() is not supported") +@testing.with_requires("numpy>=2.0") +@pytest.mark.usefixtures("skip_forward_backward") +@testing.parameterize( + *testing.product( + { + "n": [None, 5, 10, 15], + "shape": [ + (10,), + ], + "norm": [None, "backward", "ortho", "forward"], + } + ) +) +class TestPlanCtxManagerFft: + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_fft(self, xp, dtype): + a = testing.shaped_random(self.shape, xp, dtype) + + if xp is np: + return xp.fft.fft(a, n=self.n, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + + shape = (self.n,) if self.n is not None else None + plan = get_fft_plan(a, shape=shape) + assert isinstance(plan, cupy.cuda.cufft.Plan1d) + with plan: + return xp.fft.fft(a, n=self.n, norm=self.norm) + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_ifft(self, xp, dtype): + a = testing.shaped_random(self.shape, xp, dtype) + + if xp is np: + return xp.fft.ifft(a, n=self.n, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + + shape = (self.n,) if self.n is not None else None + plan = get_fft_plan(a, shape=shape) + assert isinstance(plan, cupy.cuda.cufft.Plan1d) + with plan: + return xp.fft.ifft(a, n=self.n, norm=self.norm) + + @testing.for_complex_dtypes() + def test_fft_error_on_wrong_plan(self, dtype): + # This test ensures the context manager plan is picked up + + from cupy.fft import fft + from cupyx.scipy.fftpack import get_fft_plan + + a = testing.shaped_random(self.shape, cupy, dtype) + bad_shape = tuple(5 * i for i in self.shape) + b = testing.shaped_random(bad_shape, cupy, dtype) + plan_wrong = get_fft_plan(b) + assert isinstance(plan_wrong, cupy.cuda.cufft.Plan1d) + + with pytest.raises(ValueError) as ex, plan_wrong: + fft(a, n=self.n, norm=self.norm) + # targeting a particular error + assert "Target array size does not match the plan." in str(ex.value) + + +# Almost identical to the TestPlanCtxManagerFft class, except that +# 1. multi-GPU cuFFT is used +# 2. the tested parameter combinations are adjusted to meet the requirements +@testing.with_requires("numpy>=2.0") +@pytest.mark.usefixtures("skip_forward_backward") +@testing.parameterize( + *testing.product( + { + "n": [None, 64], + "shape": [(64,), (128,)], + "norm": [None, "backward", "ortho", "forward", ""], + } + ) +) +@pytest.mark.skip("get_fft_plan() is not supported") +@testing.multi_gpu(2) +class TestMultiGpuPlanCtxManagerFft: + + @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_fft(self, xp, dtype): + _skip_multi_gpu_bug(self.shape, self.gpus) + + a = testing.shaped_random(self.shape, xp, dtype) + + if xp is np: + return xp.fft.fft(a, n=self.n, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + + shape = (self.n,) if self.n is not None else None + plan = get_fft_plan(a, shape=shape) + assert isinstance(plan, cupy.cuda.cufft.Plan1d) + with plan: + return xp.fft.fft(a, n=self.n, norm=self.norm) + + @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_ifft(self, xp, dtype): + _skip_multi_gpu_bug(self.shape, self.gpus) + + a = testing.shaped_random(self.shape, xp, dtype) + + if xp is np: + return xp.fft.ifft(a, n=self.n, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + + shape = (self.n,) if self.n is not None else None + plan = get_fft_plan(a, shape=shape) + assert isinstance(plan, cupy.cuda.cufft.Plan1d) + with plan: + return xp.fft.ifft(a, n=self.n, norm=self.norm) + + @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) + @testing.for_complex_dtypes() + def test_fft_error_on_wrong_plan(self, dtype): + # This test ensures the context manager plan is picked up + + from cupy.fft import fft + from cupyx.scipy.fftpack import get_fft_plan + + a = testing.shaped_random(self.shape, cupy, dtype) + bad_shape = tuple(4 * i for i in self.shape) + b = testing.shaped_random(bad_shape, cupy, dtype) + plan_wrong = get_fft_plan(b) + assert isinstance(plan_wrong, cupy.cuda.cufft.Plan1d) + + with pytest.raises(ValueError) as ex, plan_wrong: + fft(a, n=self.n, norm=self.norm) + # targeting a particular error + if self.norm == "": + # if norm is invalid, we still get ValueError, but it's raised + # when checking norm, earlier than the plan check + return # skip + assert "Target array size does not match the plan." in str(ex.value) + + +@testing.with_requires("numpy>=2.0") +@pytest.mark.usefixtures("skip_forward_backward") +@testing.parameterize( + *( + testing.product_dict( + [ + {"shape": (3, 4), "s": None, "axes": None}, + {"shape": (3, 4), "s": None, "axes": (-2, -1)}, + {"shape": (3, 4), "s": None, "axes": (-1, -2)}, + {"shape": (3, 4), "s": None, "axes": (0,)}, + {"shape": (3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": (1, 4, None), "axes": None}, + {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": None}, + {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, + {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)}, + {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, + {"shape": (2, 3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4, 5), "s": None, "axes": (-3, -2, -1)}, + ], + testing.product( + {"norm": [None, "backward", "ortho", "forward", ""]} + ), + ) + ) +) +@pytest.mark.skip("default FFT function is not supported") +class TestFftnContiguity: + + @nd_planning_states([True]) + @testing.for_all_dtypes() + def test_fftn_orders(self, dtype, enable_nd): + for order in ["C", "F"]: + a = testing.shaped_random(self.shape, cupy, dtype) + if order == "F": + a = cupy.asfortranarray(a) + out = cupy.fft.fftn(a, s=self.s, axes=self.axes) + + fft_func = _default_fft_func(a, s=self.s, axes=self.axes) + if fft_func is _fftn: + # nd plans have output with contiguity matching the input + assert out.flags.c_contiguous == a.flags.c_contiguous + assert out.flags.f_contiguous == a.flags.f_contiguous + else: + # 1d planning case doesn't guarantee preserved contiguity + pass + + @nd_planning_states([True]) + @testing.for_all_dtypes() + def test_ifftn_orders(self, dtype, enable_nd): + for order in ["C", "F"]: + + a = testing.shaped_random(self.shape, cupy, dtype) + if order == "F": + a = cupy.asfortranarray(a) + out = cupy.fft.ifftn(a, s=self.s, axes=self.axes) + + fft_func = _default_fft_func(a, s=self.s, axes=self.axes) + if fft_func is _fftn: + # nd plans have output with contiguity matching the input + assert out.flags.c_contiguous == a.flags.c_contiguous + assert out.flags.f_contiguous == a.flags.f_contiguous + else: + # 1d planning case doesn't guarantee preserved contiguity + pass + + +# @testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *testing.product( @@ -299,9 +893,10 @@ def test_ifftn(self, xp, dtype, order): ) ) class TestRfft: + @testing.for_all_dtypes(no_complex=True) @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, @@ -309,17 +904,12 @@ class TestRfft: ) def test_rfft(self, xp, dtype): a = testing.shaped_random(self.shape, xp, dtype) - out = xp.fft.rfft(a, n=self.n, norm=self.norm) - - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - - return out + return xp.fft.rfft(a, n=self.n, norm=self.norm) @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, - atol=2e-6, + rtol=1e-3, + atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), @@ -328,32 +918,106 @@ def test_irfft(self, xp, dtype): a = testing.shaped_random(self.shape, xp, dtype) out = xp.fft.irfft(a, n=self.n, norm=self.norm) - if xp is np and dtype in [np.float16, np.float32, np.complex64]: + if dtype == xp.float16 and xp is cupy: + # XXX: np2.0: f16 dtypes differ + out = out.astype(np.float16) + elif ( + xp is np + and np.lib.NumpyVersion(np.__version__) < "2.0.0" + and dtype == np.float32 + ): out = out.astype(np.float32) return out +@testing.with_requires("numpy>=2.0") +@pytest.mark.usefixtures("skip_forward_backward") +@testing.parameterize( + *testing.product( + { + "n": [None, 5, 10, 15], + "shape": [(10,)], + "norm": [None, "backward", "ortho", "forward"], + } + ) +) +@pytest.mark.skip("get_fft_plan() is not supported") +class TestPlanCtxManagerRfft: + + @testing.for_all_dtypes(no_complex=True) + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_rfft(self, xp, dtype): + a = testing.shaped_random(self.shape, xp, dtype) + + if xp is np: + return xp.fft.rfft(a, n=self.n, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + + shape = (self.n,) if self.n is not None else None + plan = get_fft_plan(a, shape=shape, value_type="R2C") + assert isinstance(plan, cupy.cuda.cufft.Plan1d) + with plan: + return xp.fft.rfft(a, n=self.n, norm=self.norm) + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_irfft(self, xp, dtype): + a = testing.shaped_random(self.shape, xp, dtype) + + if xp is np: + return xp.fft.irfft(a, n=self.n, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + + shape = (self.n,) if self.n is not None else None + plan = get_fft_plan(a, shape=shape, value_type="C2R") + assert isinstance(plan, cupy.cuda.cufft.Plan1d) + with plan: + return xp.fft.irfft(a, n=self.n, norm=self.norm) + + @testing.for_all_dtypes(no_complex=True) + def test_rfft_error_on_wrong_plan(self, dtype): + # This test ensures the context manager plan is picked up + + from cupy.fft import rfft + from cupyx.scipy.fftpack import get_fft_plan + + a = testing.shaped_random(self.shape, cupy, dtype) + bad_shape = tuple(5 * i for i in self.shape) + b = testing.shaped_random(bad_shape, cupy, dtype) + plan_wrong = get_fft_plan(b, value_type="R2C") + assert isinstance(plan_wrong, cupy.cuda.cufft.Plan1d) + + with pytest.raises(ValueError) as ex, plan_wrong: + rfft(a, n=self.n, norm=self.norm) + # targeting a particular error + assert "Target array size does not match the plan." in str(ex.value) + + +# @testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *( testing.product_dict( [ - # some of the following cases are modified, since in NumPy 2.0.0 - # `s` must contain only integer `s`, not None values, and - # If `s` is not None, `axes` must not be None either. {"shape": (3, 4), "s": None, "axes": None}, - {"shape": (3, 4), "s": (1, 4), "axes": (0, 1)}, - {"shape": (3, 4), "s": (1, 5), "axes": (0, 1)}, + {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1)}, {"shape": (3, 4), "s": None, "axes": (-2, -1)}, {"shape": (3, 4), "s": None, "axes": (-1, -2)}, {"shape": (3, 4), "s": None, "axes": (0,)}, + {"shape": (3, 4), "s": None, "axes": None}, # {"shape": (2, 3, 4), "s": None, "axes": None}, # mkl_fft gh-116 - # {"shape": (2, 3, 4), "s": (1, 4, 4), "axes": (0, 1, 2)}, # mkl_fft gh-115 - # {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (0, 1, 2)}, # mkl_fft gh-115 + # {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)}, # mkl_fft gh-115 # {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, # mkl_fft gh-116 # {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)}, # mkl_fft gh-116 {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, + {"shape": (2, 3, 4), "s": None, "axes": None}, {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)}, # {"shape": (2, 3, 4, 5), "s": None, "axes": None}, # mkl_fft gh-109 and gh-116 ], @@ -364,54 +1028,60 @@ def test_irfft(self, xp, dtype): ) ) class TestRfft2: + + @nd_planning_states() @testing.for_orders("CF") @testing.for_all_dtypes(no_complex=True) @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), ) - def test_rfft2(self, xp, dtype, order): + def test_rfft2(self, xp, dtype, order, enable_nd): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) - out = xp.fft.rfft2(a, s=self.s, axes=self.axes, norm=self.norm) - - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - - return out + return xp.fft.rfft2(a, s=self.s, axes=self.axes, norm=self.norm) + @nd_planning_states() @testing.for_orders("CF") @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), ) - def test_irfft2(self, xp, dtype, order): - if self.s is None and self.axes in [None, (-2, -1)]: + def test_irfft2(self, xp, dtype, order, enable_nd): + # assert config.enable_nd_planning == enable_nd + + if self.s is None and self.axes == None: pytest.skip("Input is not Hermitian Symmetric") + elif dtype == xp.float16 and xp is cupy: + pytest.xfail("XXX: np2.0: f16 dtypes differ") + elif ( + np.lib.NumpyVersion(np.__version__) < "2.0.0" + and dtype == np.float32 + ): + pytest.skip("dtypes differ") + a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) - out = xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm) - - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.float32) - - return out + return xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm) +# @testing.with_requires("numpy>=2.0") @testing.parameterize( {"shape": (3, 4), "s": None, "axes": (), "norm": None}, {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None}, ) class TestRfft2EmptyAxes: + @testing.for_all_dtypes(no_complex=True) def test_rfft2(self, dtype): for xp in (np, cupy): @@ -427,26 +1097,24 @@ def test_irfft2(self, dtype): xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm) +# @testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *( testing.product_dict( [ - # some of the following cases are modified, since in NumPy 2.0.0 - # `s` must contain only integer `s`, not None values, and - # If `s` is not None, `axes` must not be None either. {"shape": (3, 4), "s": None, "axes": None}, - {"shape": (3, 4), "s": (1, 4), "axes": (0, 1)}, - {"shape": (3, 4), "s": (1, 5), "axes": (0, 1)}, + {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1)}, {"shape": (3, 4), "s": None, "axes": (-2, -1)}, {"shape": (3, 4), "s": None, "axes": (-1, -2)}, {"shape": (3, 4), "s": None, "axes": (0,)}, - # {"shape": (2, 3, 4), "s": None, "axes": None}, # mkl_fft gh-116 - # {"shape": (2, 3, 4), "s": (1, 4, 4), "axes": (0, 1, 2)}, # mkl_fft gh-115 - # {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (0, 1, 2)}, # mkl_fft gh-115 + {"shape": (3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": None, "axes": None}, + # {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)}, # mkl_fft gh-115 # {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, # mkl_fft gh-116 # {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)}, # mkl_fft gh-116 {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, + {"shape": (2, 3, 4), "s": None, "axes": None}, {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)}, # {"shape": (2, 3, 4, 5), "s": None, "axes": None}, # mkl_fft gh-109 and gh-116 ], @@ -457,54 +1125,207 @@ def test_irfft2(self, dtype): ) ) class TestRfftn: + + @nd_planning_states() @testing.for_orders("CF") @testing.for_all_dtypes(no_complex=True) @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), ) - def test_rfftn(self, xp, dtype, order): + def test_rfftn(self, xp, dtype, order, enable_nd): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) - out = xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm) - - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.complex64) - - return out + return xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm) + @nd_planning_states() @testing.for_orders("CF") @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), ) - def test_irfftn(self, xp, dtype, order): - if self.s is None and self.axes in [None, (-2, -1)]: + def test_irfftn(self, xp, dtype, order, enable_nd): + # assert config.enable_nd_planning == enable_nd + + if self.s is None and self.axes == None: pytest.skip("Input is not Hermitian Symmetric") + elif dtype == xp.float16 and xp is cupy: + pytest.xfail("XXX: np2.0: f16 dtypes differ") + elif ( + np.lib.NumpyVersion(np.__version__) < "2.0.0" + and dtype == np.float32 + ): + pytest.skip("dtypes differ") + a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) - out = xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm) + return xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm) - if xp is np and dtype in [np.float16, np.float32, np.complex64]: - out = out.astype(np.float32) - return out +# Only those tests in which a legit plan can be obtained are kept +@testing.with_requires("numpy>=2.0") +@pytest.mark.usefixtures("skip_forward_backward") +@testing.parameterize( + *( + testing.product_dict( + [ + {"shape": (3, 4), "s": None, "axes": None}, + {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1)}, + {"shape": (3, 4), "s": None, "axes": (-2, -1)}, + {"shape": (3, 4), "s": None, "axes": (0,)}, + {"shape": (3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)}, + {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, + {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, + {"shape": (2, 3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)}, + ], + testing.product({"norm": [None, "backward", "ortho", "forward"]}), + ) + ) +) +@pytest.mark.skip("get_fft_plan() is not supported") +class TestPlanCtxManagerRfftn: + + @pytest.fixture(autouse=True) + def skip_buggy(self): + if cupy.cuda.runtime.is_hip: + # TODO(leofang): test newer ROCm versions + if self.axes == (0, 1) and self.shape == (2, 3, 4): + pytest.skip( + "hipFFT's PlanNd for this case " + "is buggy, so Plan1d is generated " + "instead" + ) + + @nd_planning_states() + @testing.for_all_dtypes(no_complex=True) + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_rfftn(self, xp, dtype, enable_nd): + assert config.enable_nd_planning == enable_nd + a = testing.shaped_random(self.shape, xp, dtype) + + if xp is np: + return xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + plan = get_fft_plan(a, self.s, self.axes, value_type="R2C") + with plan: + return xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm) + @nd_planning_states() + @testing.for_all_dtypes() + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False + ) + def test_irfftn(self, xp, dtype, enable_nd): + assert config.enable_nd_planning == enable_nd + a = testing.shaped_random(self.shape, xp, dtype) + + if dtype == xp.float16 and xp is cupy: + pytest.xfail("XXX: np2.0: f16 dtypes differ") + + if xp is np: + return xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm) + + from cupyx.scipy.fftpack import get_fft_plan + + plan = get_fft_plan(a, self.s, self.axes, value_type="C2R") + with plan: + return xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm) + + # TODO(leofang): write test_rfftn_error_on_wrong_plan()? + + +@testing.with_requires("numpy>=2.0") +@pytest.mark.usefixtures("skip_forward_backward") +@testing.parameterize( + *( + testing.product_dict( + [ + {"shape": (3, 4), "s": None, "axes": None}, + {"shape": (3, 4), "s": None, "axes": (-2, -1)}, + {"shape": (3, 4), "s": None, "axes": (-1, -2)}, + {"shape": (3, 4), "s": None, "axes": (0,)}, + {"shape": (3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)}, + {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, + {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)}, + {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, + {"shape": (2, 3, 4), "s": None, "axes": None}, + {"shape": (2, 3, 4, 5), "s": None, "axes": None}, + ], + testing.product({"norm": [None, "backward", "ortho", "forward"]}), + ) + ) +) +@pytest.mark.skip("default FFT function is not supported") +class TestRfftnContiguity: + + @nd_planning_states([True]) + @testing.for_float_dtypes() + def test_rfftn_orders(self, dtype, enable_nd): + for order in ["C", "F"]: + a = testing.shaped_random(self.shape, cupy, dtype) + if order == "F": + a = cupy.asfortranarray(a) + out = cupy.fft.rfftn(a, s=self.s, axes=self.axes) + + fft_func = _default_fft_func( + a, s=self.s, axes=self.axes, value_type="R2C" + ) + if fft_func is _fftn: + # nd plans have output with contiguity matching the input + assert out.flags.c_contiguous == a.flags.c_contiguous + assert out.flags.f_contiguous == a.flags.f_contiguous + else: + # 1d planning case doesn't guarantee preserved contiguity + pass + + @nd_planning_states([True]) + @testing.for_all_dtypes() + def test_ifftn_orders(self, dtype, enable_nd): + for order in ["C", "F"]: + + a = testing.shaped_random(self.shape, cupy, dtype) + if order == "F": + a = cupy.asfortranarray(a) + out = cupy.fft.irfftn(a, s=self.s, axes=self.axes) + + fft_func = _default_fft_func( + a, s=self.s, axes=self.axes, value_type="C2R" + ) + if fft_func is _fftn: + # nd plans have output with contiguity matching the input + assert out.flags.c_contiguous == a.flags.c_contiguous + assert out.flags.f_contiguous == a.flags.f_contiguous + else: + # 1d planning case doesn't guarantee preserved contiguity + pass + + +# @testing.with_requires("numpy>=2.0") @testing.parameterize( {"shape": (3, 4), "s": None, "axes": (), "norm": None}, {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None}, ) class TestRfftnEmptyAxes: + @testing.for_all_dtypes(no_complex=True) def test_rfftn(self, dtype): for xp in (np, cupy): @@ -520,21 +1341,23 @@ def test_irfftn(self, dtype): xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm) +# @testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *testing.product( { "n": [None, 5, 10, 15], "shape": [(10,), (10, 10)], - "norm": [None, "backward", "ortho", "forward", ""], + "norm": [None, "backward", "ortho", "forward"], } ) ) class TestHfft: + @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, - atol=2e-6, + rtol=1e-3, + atol=1e-7, accept_error=ValueError, contiguous_check=False, type_check=has_support_aspect64(), @@ -543,14 +1366,21 @@ def test_hfft(self, xp, dtype): a = testing.shaped_random(self.shape, xp, dtype) out = xp.fft.hfft(a, n=self.n, norm=self.norm) - if xp is np and dtype in [np.float16, np.float32, np.complex64]: + if dtype == xp.float16 and xp is cupy: + # XXX: np2.0: f16 dtypes differ + out = out.astype(np.float16) + elif ( + xp is np + and np.lib.NumpyVersion(np.__version__) < "2.0.0" + and dtype == np.float32 + ): out = out.astype(np.float32) return out @testing.for_all_dtypes(no_complex=True) @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False, @@ -560,39 +1390,46 @@ def test_ihfft(self, xp, dtype): a = testing.shaped_random(self.shape, xp, dtype) out = xp.fft.ihfft(a, n=self.n, norm=self.norm) - if xp is np and dtype in [np.float16, np.float32, np.complex64]: + if ( + xp is np + and np.lib.NumpyVersion(np.__version__) < "2.0.0" + and dtype == np.float32 + ): out = out.astype(np.complex64) return out +# @testing.with_requires("numpy>=2.0") @testing.parameterize( {"n": 1, "d": 1}, {"n": 10, "d": 0.5}, {"n": 100, "d": 2}, ) class TestFftfreq: + + @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, + contiguous_check=False, type_check=has_support_aspect64(), ) - def test_fftfreq(self, xp): - out = xp.fft.fftfreq(self.n, self.d) - - return out + def test_fftfreq(self, xp, dtype): + return xp.fft.fftfreq(self.n, self.d) + @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, + contiguous_check=False, type_check=has_support_aspect64(), ) - def test_rfftfreq(self, xp): - out = xp.fft.rfftfreq(self.n, self.d) - - return out + def test_rfftfreq(self, xp, dtype): + return xp.fft.rfftfreq(self.n, self.d) +# @testing.with_requires("numpy>=2.0") @testing.parameterize( {"shape": (5,), "axes": None}, {"shape": (5,), "axes": 0}, @@ -603,26 +1440,54 @@ def test_rfftfreq(self, xp): {"shape": (10, 10), "axes": (0, 1)}, ) class TestFftshift: + @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, + contiguous_check=False, type_check=has_support_aspect64(), ) def test_fftshift(self, xp, dtype): x = testing.shaped_random(self.shape, xp, dtype) - out = xp.fft.fftshift(x, self.axes) - - return out + return xp.fft.fftshift(x, self.axes) @testing.for_all_dtypes() @testing.numpy_cupy_allclose( - rtol=1e-4, + rtol=1e-3, atol=1e-7, + contiguous_check=False, type_check=has_support_aspect64(), ) def test_ifftshift(self, xp, dtype): x = testing.shaped_random(self.shape, xp, dtype) - out = xp.fft.ifftshift(x, self.axes) + return xp.fft.ifftshift(x, self.axes) - return out + +@pytest.mark.skip("no threading support") +class TestThreading: + + def test_threading1(self): + import threading + + from cupy.cuda.cufft import get_current_plan + + def thread_get_curr_plan(): + cupy.cuda.Device().use() + return get_current_plan() + + new_thread = threading.Thread(target=thread_get_curr_plan) + new_thread.start() + + def test_threading2(self): + import threading + + a = cupy.arange(100, dtype=cupy.complex64).reshape(10, 10) + + def thread_do_fft(): + cupy.cuda.Device().use() + b = cupy.fft.fftn(a) + return b + + new_thread = threading.Thread(target=thread_do_fft) + new_thread.start()