diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4fcbc91..50b4cb9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,4 +34,4 @@ jobs: - name: Pytest run: | - pytest tests/ --cov + pytest tests/ --cov -k "not cuda" diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..c030e39 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,91 @@ +pipeline { + agent none + options { + disableConcurrentBuilds() + buildDiscarder(logRotator(numToKeepStr: '8', daysToKeepStr: '20')) + timeout(time: 1, unit: 'HOURS') + } + stages { + stage('CUDA Tests') { + agent { + dockerfile { + filename 'ci/docker/Dockerfile-cuda11.8' + args '--gpus 2' + label 'docker && v100' + } + } + environment { + HOME = "$WORKSPACE" + PYBIN = "/opt/python/cp39-cp39/bin" + LIBRARY_PATH = "$WORKSPACE/finufft/build" + LD_LIBRARY_PATH = "$WORKSPACE/finufft/build" + } + steps { + + // TODO - reconsider install strategy once finufft/cufinufft 2.2 is released + checkout scmGit(branches: [[name: '*/master']], + extensions: [cloneOption(noTags: true, reference: '', shallow: true), + [$class: 'RelativeTargetDirectory', relativeTargetDir: 'finufft'], + cleanAfterCheckout()], + userRemoteConfigs: [[url: 'https://github.com/flatironinstitute/finufft']]) + + sh '''#!/bin/bash -ex + nvidia-smi + ''' + sh '''#!/bin/bash -ex + echo $HOME + ls + ''' + sh '''#!/bin/bash -ex + cd finufft + # v100 cuda arch + cuda_arch="70" + + cmake -B build . -DFINUFFT_USE_CUDA=ON \ + -DFINUFFT_USE_CPU=OFF \ + -DFINUFFT_BUILD_TESTS=OFF \ + -DCMAKE_CUDA_ARCHITECTURES="$cuda_arch" \ + -DBUILD_TESTING=ON + cd build + make -j4 + ''' + + sh '${PYBIN}/python3 -m venv $HOME' + sh '''#!/bin/bash -ex + source $HOME/bin/activate + python3 -m pip install --upgrade pip + # we could also move pytorch install inside docker + python3 -m pip install "torch~=2.1.0" --index-url https://download.pytorch.org/whl/cu118 + python3 -m pip install finufft/python/cufinufft + + python3 -m pip install -e .[dev] + + python3 -m pytest -k "cuda" tests/ --cov -v + ''' + } + } + } + post { + failure { + emailext subject: '$PROJECT_NAME - Build #$BUILD_NUMBER - $BUILD_STATUS', + body: '''$PROJECT_NAME - Build #$BUILD_NUMBER - $BUILD_STATUS + +Check console output at $BUILD_URL to view full results. + +Building $BRANCH_NAME for $CAUSE +$JOB_DESCRIPTION + +Chages: +$CHANGES + +End of build log: +${BUILD_LOG,maxLines=200} +''', + recipientProviders: [ + [$class: 'DevelopersRecipientProvider'], + ], + replyTo: '$DEFAULT_REPLYTO', + to: 'bward@flatironinstitute.org' + } + } +} diff --git a/ci/docker/Dockerfile-cuda11.8 b/ci/docker/Dockerfile-cuda11.8 new file mode 100644 index 0000000..ce622f7 --- /dev/null +++ b/ci/docker/Dockerfile-cuda11.8 @@ -0,0 +1,58 @@ +# Based on https://github.com/flatironinstitute/finufft/blob/master/tools/cufinufft/docker/cuda11.2/Dockerfile-x86_64 + +FROM quay.io/pypa/manylinux2014_x86_64 +LABEL maintainer "Brian Ward" + +ENV CUDA_MAJOR 11 +ENV CUDA_MINOR 8 +ENV CUDA_DASH_VERSION ${CUDA_MAJOR}-${CUDA_MINOR} +ENV CUDA_DOT_VERSION ${CUDA_MAJOR}.${CUDA_MINOR} + +# ---- The following block adds layers for CUDA --- # +# base +RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \ + curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \ + echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict - + +COPY ci/docker/cuda.repo /etc/yum.repos.d/cuda.repo + +# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a +RUN yum install -y \ + cuda-cudart-${CUDA_DASH_VERSION} \ + cuda-compat-${CUDA_DASH_VERSION} && \ + ln -s cuda-${CUDA_DOT_VERSION} /usr/local/cuda + +# nvidia-docker 1.0 +RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ + echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf + +ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} +ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 + +# nvidia-container-runtime +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +ENV NVIDIA_REQUIRE_CUDA "cuda>=${CUDA_DOT_VERSION} brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441" + +# runtime +RUN yum install -y \ + cuda-libraries-${CUDA_DASH_VERSION} \ + cuda-nvtx-${CUDA_DASH_VERSION} \ + cuda-cudart-devel-${CUDA_DASH_VERSION} \ + cuda-libraries-devel-${CUDA_DASH_VERSION} \ + cuda-nvprof-${CUDA_DASH_VERSION} \ + cuda-nvcc-${CUDA_DASH_VERSION} + +ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs + +# /CUDA # + +# CUDA 11 doesn't work on gcc/g++ newer than v9 +RUN yum install -y \ + devtoolset-9-gcc \ + devtoolset-9-gcc-c++ \ + cmake && \ + rm -rf /var/cache/yum/* + +ENV PATH /opt/rh/devtoolset-9/root/usr/bin:${PATH} + diff --git a/ci/docker/cuda.repo b/ci/docker/cuda.repo new file mode 100644 index 0000000..ba2cba6 --- /dev/null +++ b/ci/docker/cuda.repo @@ -0,0 +1,6 @@ +[cuda] +name=cuda +baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64 +enabled=1 +gpgcheck=1 +gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA diff --git a/pytorch_finufft/functional.py b/pytorch_finufft/functional.py index 7b40e71..c6fde87 100644 --- a/pytorch_finufft/functional.py +++ b/pytorch_finufft/functional.py @@ -5,9 +5,28 @@ from typing import Any, Dict, Optional, Tuple, Union import numpy as np -import finufft import torch +try: + import finufft + + FINUFFT_AVAIL = True +except ImportError: + FINUFFT_AVAIL = False + +try: + import cufinufft + + CUFINUFFT_AVAIL = True +except ImportError: + CUFINUFFT_AVAIL = False + +if not (FINUFFT_AVAIL or CUFINUFFT_AVAIL): + raise ImportError( + "No FINUFFT implementation available. " + "Install either finufft or cufinufft and ensure they are importable." + ) + import pytorch_finufft._err as err ############################################################################### @@ -1595,27 +1614,40 @@ def backward( ) - - - ############################################################################### # Consolidated forward function for all 1D, 2D, and 3D problems for nufft type 1 ############################################################################### -def get_nufft_func(dim, nufft_type): - return getattr(finufft, f"nufft{dim}d{nufft_type}") + +def get_nufft_func(dim, nufft_type, device_type): + if device_type == "cuda": + return getattr(cufinufft, f"nufft{dim}d{nufft_type}") + + # CPU needs extra work to go to/from torch and numpy + finufft_func = getattr(finufft, f"nufft{dim}d{nufft_type}") + + def f(*args, **kwargs): + new_args = [arg for arg in args] + for i in range(len(new_args)): + if isinstance(new_args[i], torch.Tensor): + new_args[i] = new_args[i].data.numpy() + + return torch.from_numpy(finufft_func(*new_args, **kwargs)) + + return f class finufft_type1(torch.autograd.Function): @staticmethod def forward( - ctx: Any, - points: torch.Tensor, - values: torch.Tensor, - output_shape: Union[int, tuple[int, int], tuple[int, int, int]], - out: Optional[torch.Tensor]=None, - fftshift: bool=False, - finufftkwargs: dict[str, Union[int, float]]=None): + ctx: Any, + points: torch.Tensor, + values: torch.Tensor, + output_shape: Union[int, Tuple[int, int], Tuple[int, int, int]], + out: Optional[torch.Tensor] = None, + fftshift: bool = False, + finufftkwargs: dict[str, Union[int, float]] = None, + ): """ Evaluates the Type 1 NUFFT on the inputs. @@ -1626,8 +1658,13 @@ def forward( # All this requires is a check on the out array to make sure it is the # correct shape. - err._type1_checks(points, values, output_shape) # revisit these error checks to take into account the shape of points instead of passing them separately - # ^ make sure these checks check for consistency between output shape and len(points) + # TODO: + # revisit these error checks to take into account the shape of points + # instead of passing them separately + # make sure these checks check for consistency between output shape and + # len(points) + # Also need device checks + err._type1_checks(points, values, output_shape) if finufftkwargs is None: finufftkwargs = dict() @@ -1640,7 +1677,8 @@ def forward( # to note instead that there is a conflict in fftshift if _mode_ordering != 1: raise ValueError( - "Double specification of ordering; only one of fftshift and modeord should be provided" + "Double specification of ordering; only one of fftshift and " + "modeord should be provided" ) _mode_ordering = 0 @@ -1654,38 +1692,34 @@ def forward( ndim = points.shape[0] assert len(output_shape) == ndim - nufft_func = get_nufft_func(ndim, 1) - finufft_out = torch.from_numpy( - nufft_func( - *points.data.numpy(), - values.data.numpy(), - output_shape, - modeord=_mode_ordering, - isign=_i_sign, - **finufftkwargs, - ) + nufft_func = get_nufft_func(ndim, 1, points.device.type) + finufft_out = nufft_func( + *points, values, output_shape, isign=_i_sign, **finufftkwargs ) + # because modeord is missing from cufinufft + if _mode_ordering: + finufft_out = torch.fft.ifftshift(finufft_out) return finufft_out @staticmethod def backward( ctx: Any, grad_output: torch.Tensor - ) -> tuple[Union[torch.Tensor, None], ...]: + ) -> Tuple[Union[torch.Tensor, None], ...]: """ - Implements derivatives wrt. each argument in the forward method. + Implements derivatives wrt. each argument in the forward method. - Parameters - ---------- - ctx : Any - Pytorch context object. - grad_output : torch.Tensor - Backpass gradient output + Parameters + ---------- + ctx : Any + Pytorch context object. + grad_output : torch.Tensor + Backpass gradient output - Returns - ------- - tuple[Union[torch.Tensor, None], ...] - A tuple of derivatives wrt. each argument in the forward method + Returns + ------- + Tuple[Union[torch.Tensor, None], ...] + A tuple of derivatives wrt. each argument in the forward method """ _i_sign = -1 * ctx.isign _mode_ordering = ctx.mode_ordering @@ -1695,52 +1729,57 @@ def backward( start_points = -(np.array(grad_output.shape) // 2) end_points = start_points + grad_output.shape - slices = tuple(slice(start, end) for start, end in zip(start_points, end_points)) + slices = tuple( + slice(start, end) for start, end in zip(start_points, end_points) + ) # CPU idiosyncracy that needs to be done differently - coord_ramps = torch.from_numpy(np.mgrid[slices]) + coord_ramps = torch.from_numpy(np.mgrid[slices]).to(points.device) grads_points = None grad_values = None ndim = points.shape[0] - nufft_func = get_nufft_func(ndim, 2) + nufft_func = get_nufft_func(ndim, 2, points.device.type) if ctx.needs_input_grad[0]: # wrt points - if _mode_ordering != 0: - coord_ramps = torch.fft.ifftshift(coord_ramps, dim=tuple(range(1, ndim+1))) - + if _mode_ordering: + coord_ramps = torch.fft.ifftshift( + coord_ramps, dim=tuple(range(1, ndim + 1)) + ) + ramped_grad_output = coord_ramps * grad_output[np.newaxis] * 1j * _i_sign grads_points = [] - for ramp in ramped_grad_output: # we can batch this into finufft - backprop_ramp = torch.from_numpy( - nufft_func( - *points.numpy(), - ramp.data.numpy(), - isign=_i_sign, - modeord=_mode_ordering, - **finufftkwargs, - )) + for ramp in ramped_grad_output: # we can batch this into finufft + if _mode_ordering: + ramp = torch.fft.fftshift(ramp) + + backprop_ramp = nufft_func( + *points, + ramp, + isign=_i_sign, + **finufftkwargs, + ) + grad_points = (backprop_ramp.conj() * values).real + grads_points.append(grad_points) - + grads_points = torch.stack(grads_points) if ctx.needs_input_grad[1]: - np_grad_output = grad_output.data.numpy() + if _mode_ordering: + grad_output = torch.fft.fftshift(grad_output) - grad_values = torch.from_numpy( - nufft_func( - *points.numpy(), - np_grad_output, - isign=_i_sign, - modeord=_mode_ordering, - **finufftkwargs, - ) + grad_values = nufft_func( + *points, + grad_output, + isign=_i_sign, + **finufftkwargs, ) return ( diff --git a/tests/test_1d/test_forward_1d.py b/tests/test_1d/test_forward_1d.py index 5379927..eec8a9b 100644 --- a/tests/test_1d/test_forward_1d.py +++ b/tests/test_1d/test_forward_1d.py @@ -66,15 +66,14 @@ def test_1d_t1_forward_CPU(values: torch.Tensor) -> None: torch.linalg.norm(finufft1D1_out - against_scipy) / N**2 ) == pytest.approx(0, abs=1e-06) - abs_errors = torch.abs(finufft1D1_out - against_torch) l_inf_error = abs_errors.max() l_2_error = torch.sqrt(torch.sum(abs_errors**2)) l_1_error = torch.sum(abs_errors) - assert l_inf_error < 3.5e-3 * N ** .6 - assert l_2_error < 7.5e-4 * N ** 1.1 - assert l_1_error < 5e-4 * N ** 1.6 + assert l_inf_error < 3.5e-3 * N**0.6 + assert l_2_error < 7.5e-4 * N**1.1 + assert l_1_error < 5e-4 * N**1.6 @pytest.mark.parametrize("targets", cases) @@ -106,17 +105,16 @@ def test_1d_t2_forward_CPU(targets: torch.Tensor): ) -@pytest.mark.parametrize("N", Ns) -def test_t1_forward_CPU(N: int) -> None: +def check_t1_forward(N: int, device: str) -> None: """ Tests against implementations of the FFT by setting up a uniform grid over which to call FINUFFT through the API. """ g = np.mgrid[:N] * 2 * np.pi / N g.shape = 1, -1 - points = torch.from_numpy(g.reshape(1, -1)) + points = torch.from_numpy(g.reshape(1, -1)).to(device) - values = torch.randn(*points[0].shape, dtype=torch.complex128) + values = torch.randn(*points[0].shape, dtype=torch.complex128).to(device) print("N is " + str(N)) print("shape of points is " + str(points.shape)) @@ -136,10 +134,19 @@ def test_t1_forward_CPU(N: int) -> None: l_1_error = torch.sum(abs_errors) assert l_inf_error < 4.5e-5 * N - assert l_2_error < 1e-5 * N ** 2 - assert l_1_error < 1e-5 * N ** 3 + assert l_2_error < 1e-5 * N**2 + assert l_1_error < 1e-5 * N**3 +@pytest.mark.parametrize("N", Ns) +def test_t1_forward_CPU(N: int) -> None: + check_t1_forward(N, "cpu") + + +@pytest.mark.parametrize("N", Ns) +def test_t1_forward_cuda(N: int) -> None: + check_t1_forward(N, "cuda") + # @pytest.mark.parametrize("values", cases) # def test_1d_t3_forward_CPU(values: torch.Tensor) -> None: diff --git a/tests/test_2d/test_backward_2d.py b/tests/test_2d/test_backward_2d.py index 6a9b707..ddde4cf 100644 --- a/tests/test_2d/test_backward_2d.py +++ b/tests/test_2d/test_backward_2d.py @@ -5,8 +5,6 @@ import pytorch_finufft -from functools import partial - torch.set_default_tensor_type(torch.DoubleTensor) torch.set_default_dtype(torch.float64) torch.manual_seed(0) @@ -100,48 +98,68 @@ def test_t1_backward_CPU_values( assert gradcheck(apply_finufft2d1(modifier, fftshift, isign), inputs) -@pytest.mark.parametrize("N", Ns) -@pytest.mark.parametrize("modifier", length_modifiers) -@pytest.mark.parametrize("fftshift", [False, True]) -@pytest.mark.parametrize("isign", [-1, 1]) -def test_t1_consolidated_backward_CPU_values(N: int, modifier: int, fftshift: bool, isign: int) -> None: - - points = torch.rand((2, N), dtype=torch.float64) * 2 * np.pi - values = torch.randn(N, dtype=torch.complex128) +def check_t1_backward( + N: int, + modifier: int, + fftshift: bool, + isign: int, + device: str, + points_or_values: bool, +) -> None: + points = torch.rand((2, N), dtype=torch.float64).to(device) * 2 * np.pi + values = torch.randn(N, dtype=torch.complex128).to(device) - points.requires_grad = False - values.requires_grad = True + points.requires_grad = points_or_values + values.requires_grad = not points_or_values inputs = (points, values) def func(points, values): return pytorch_finufft.functional.finufft_type1.apply( - points, values, (N,N + modifier), None, fftshift, dict(isign=isign) + points, values, (N, N + modifier), None, fftshift, dict(isign=isign) ) - assert gradcheck(func, inputs) + assert gradcheck(func, inputs, atol=1e-5 * N) @pytest.mark.parametrize("N", Ns) @pytest.mark.parametrize("modifier", length_modifiers) @pytest.mark.parametrize("fftshift", [False, True]) @pytest.mark.parametrize("isign", [-1, 1]) -def test_t1_consolidated_backward_CPU_points(N: int, modifier: int, fftshift: bool, isign: int) -> None: +def test_t1_consolidated_backward_CPU_points( + N: int, modifier: int, fftshift: bool, isign: int +) -> None: + check_t1_backward(N, modifier, fftshift, isign, "cpu", True) - points = torch.rand((2, N), dtype=torch.float64) * 2 * np.pi - values = torch.randn(N, dtype=torch.complex128) - points.requires_grad = True - values.requires_grad = False +@pytest.mark.parametrize("N", Ns) +@pytest.mark.parametrize("modifier", length_modifiers) +@pytest.mark.parametrize("fftshift", [False, True]) +@pytest.mark.parametrize("isign", [-1, 1]) +def test_t1_consolidated_backward_CPU_values( + N: int, modifier: int, fftshift: bool, isign: int +) -> None: + check_t1_backward(N, modifier, fftshift, isign, "cpu", False) - inputs = (points, values) - def func(points, values): - return pytorch_finufft.functional.finufft_type1.apply( - points, values, (N,N + modifier), None, fftshift, dict(isign=isign) - ) +@pytest.mark.parametrize("N", Ns) +@pytest.mark.parametrize("modifier", length_modifiers) +@pytest.mark.parametrize("fftshift", [False, True]) +@pytest.mark.parametrize("isign", [-1, 1]) +def test_t1_consolidated_backward_cuda_values( + N: int, modifier: int, fftshift: bool, isign: int +) -> None: + check_t1_backward(N, modifier, fftshift, isign, "cuda", False) - assert gradcheck(func, inputs, atol=1e-5 * N) + +@pytest.mark.parametrize("N", Ns) +@pytest.mark.parametrize("modifier", length_modifiers) +@pytest.mark.parametrize("fftshift", [False, True]) +@pytest.mark.parametrize("isign", [-1, 1]) +def test_t1_consolidated_backward_cuda_points( + N: int, modifier: int, fftshift: bool, isign: int +) -> None: + check_t1_backward(N, modifier, fftshift, isign, "cuda", True) @pytest.mark.parametrize("N", Ns) diff --git a/tests/test_2d/test_forward_2d.py b/tests/test_2d/test_forward_2d.py index 1dda568..0ae6d85 100644 --- a/tests/test_2d/test_forward_2d.py +++ b/tests/test_2d/test_forward_2d.py @@ -1,10 +1,12 @@ import numpy as np import pytest import torch -torch.manual_seed(0) import pytorch_finufft +torch.manual_seed(0) + + # Case generation Ns = [ 10, @@ -52,8 +54,8 @@ def test_2d_t1_forward_CPU(N: int) -> None: l_1_error = torch.sum(abs_errors) assert l_inf_error < 5e-5 * N - assert l_2_error < 1e-5 * N ** 2 - assert l_1_error < 1e-5 * N ** 3 + assert l_2_error < 1e-5 * N**2 + assert l_1_error < 1e-5 * N**3 @pytest.mark.parametrize("N", Ns) @@ -102,8 +104,8 @@ def test_2d_t2_forward_CPU(N: int) -> None: l_1_error = torch.sum(abs_errors) assert l_inf_error < 1e-5 * N - assert l_2_error < 1e-5 * N ** 2 - assert l_1_error < 1e-5 * N ** 3 + assert l_2_error < 1e-5 * N**2 + assert l_1_error < 1e-5 * N**3 # @pytest.mark.parametrize("N", Ns) @@ -122,16 +124,15 @@ def test_2d_t2_forward_CPU(N: int) -> None: # pass -@pytest.mark.parametrize("N", Ns) -def test_t1_forward_CPU(N: int) -> None: +def check_t1_forward(N: int, device: str) -> None: """ Tests against implementations of the FFT by setting up a uniform grid over which to call FINUFFT through the API. """ g = np.mgrid[:N, :N] * 2 * np.pi / N - points = torch.from_numpy(g.reshape(2, -1)) + points = torch.from_numpy(g.reshape(2, -1)).to(device) - values = torch.randn(*points[0].shape, dtype=torch.complex128) + values = torch.randn(*points[0].shape, dtype=torch.complex128).to(device) print("N is " + str(N)) print("shape of points is " + str(points.shape)) @@ -151,6 +152,15 @@ def test_t1_forward_CPU(N: int) -> None: l_1_error = torch.sum(abs_errors) assert l_inf_error < 4.5e-5 * N - assert l_2_error < 1e-5 * N ** 2 - assert l_1_error < 1e-5 * N ** 3 + assert l_2_error < 1e-5 * N**2 + assert l_1_error < 1e-5 * N**3 + +@pytest.mark.parametrize("N", Ns) +def test_t1_forward_CPU(N: int) -> None: + check_t1_forward(N, "cpu") + + +@pytest.mark.parametrize("N", Ns) +def test_t1_forward_cuda(N: int) -> None: + check_t1_forward(N, "cuda") diff --git a/tests/test_3d/test_forward_3d.py b/tests/test_3d/test_forward_3d.py index 45484aa..524e9a6 100644 --- a/tests/test_3d/test_forward_3d.py +++ b/tests/test_3d/test_forward_3d.py @@ -1,10 +1,12 @@ import numpy as np import pytest import torch -torch.manual_seed(0) import pytorch_finufft +torch.manual_seed(0) + + # Case generation Ns = [ 5, @@ -45,10 +47,9 @@ def test_3d_t1_forward_CPU(N: int) -> None: l_2_error = torch.sqrt(torch.sum(abs_errors**2)) l_1_error = torch.sum(abs_errors) - assert l_inf_error < 2e-5 * N ** 1.5 - assert l_2_error < 1e-5 * N ** 3 - assert l_1_error < 1e-5 * N ** 4.5 - + assert l_inf_error < 2e-5 * N**1.5 + assert l_2_error < 1e-5 * N**3 + assert l_1_error < 1e-5 * N**4.5 @pytest.mark.parametrize("N", Ns) @@ -79,21 +80,20 @@ def test_3d_t2_forward_CPU(N: int) -> None: l_2_error = torch.sqrt(torch.sum(abs_errors**2)) l_1_error = torch.sum(abs_errors) - assert l_inf_error < 1e-5 * N ** 1.5 - assert l_2_error < 1e-5 * N ** 3 - assert l_1_error < 1e-5 * N ** 4.5 + assert l_inf_error < 1e-5 * N**1.5 + assert l_2_error < 1e-5 * N**3 + assert l_1_error < 1e-5 * N**4.5 -@pytest.mark.parametrize("N", Ns) -def test_t1_forward_CPU(N: int) -> None: +def check_t1_forward(N: int, device: str) -> None: """ Tests against implementations of the FFT by setting up a uniform grid over which to call FINUFFT through the API. """ g = np.mgrid[:N, :N, :N] * 2 * np.pi / N - points = torch.from_numpy(g.reshape(3, -1)) + points = torch.from_numpy(g.reshape(3, -1)).to(device) - values = torch.randn(*points[0].shape, dtype=torch.complex128) + values = torch.randn(*points[0].shape, dtype=torch.complex128).to(device) print("N is " + str(N)) print("shape of points is " + str(points.shape)) @@ -112,6 +112,16 @@ def test_t1_forward_CPU(N: int) -> None: l_2_error = torch.sqrt(torch.sum(abs_errors**2)) l_1_error = torch.sum(abs_errors) - assert l_inf_error < 1.5e-5 * N ** 1.5 - assert l_2_error < 1e-5 * N ** 3 - assert l_1_error < 1e-5 * N ** 4.5 \ No newline at end of file + assert l_inf_error < 1.5e-5 * N**1.5 + assert l_2_error < 1e-5 * N**3 + assert l_1_error < 1e-5 * N**4.5 + + +@pytest.mark.parametrize("N", Ns) +def test_t1_forward_CPU(N: int) -> None: + check_t1_forward(N, "cpu") + + +@pytest.mark.parametrize("N", Ns) +def test_t1_forward_cuda(N: int) -> None: + check_t1_forward(N, "cuda")