Updated some things for GPUs

Parallel-in-Time · Sep 13, 2024 · f33a7e9 · f33a7e9
1 parent 016a784
commit f33a7e9
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 7 deletions.
diff --git a/pySDC/helpers/spectral_helper.py b/pySDC/helpers/spectral_helper.py
@@ -29,11 +29,13 @@ def setup_GPU(cls):
         import cupy as cp
         import cupyx.scipy.sparse as sparse_lib
         import cupyx.scipy.sparse.linalg as linalg
+        import cupyx.scipy.fft as fft_lib
         from pySDC.implementations.datatype_classes.cupy_mesh import cupy_mesh
 
         cls.xp = cp
         cls.sparse_lib = sparse_lib
         cls.linalg = linalg
+        cls.fft_lib = fft_lib
 
     def get_Id(self):
         return self.sparse_lib.eye(self.N)
@@ -1429,6 +1431,8 @@ def get_aligned(self, u, axis_in, axis_out, fft=None, forward=False, **kwargs):
         """
         if self.comm is None or axis_in == axis_out:
             return u.copy()
+        if self.comm.size == 1:
+            return u.copy()
 
         fft = self.get_fft(**kwargs) if fft is None else fft
 
@@ -1450,6 +1454,7 @@ def get_aligned(self, u, axis_in, axis_out, fft=None, forward=False, **kwargs):
 
                 current_axis = transfer.axisB
                 u = arrayB
+
             return u
         elif axis_in in axisB and axis_out in axisA:
             while current_axis != axis_out:
@@ -1463,6 +1468,7 @@ def get_aligned(self, u, axis_in, axis_out, fft=None, forward=False, **kwargs):
 
                 current_axis = transfer.axisA
                 u = arrayA
+
             return u
         else:  # go the potentially slower route of not reusing transfer classes
             from mpi4py_fft import newDistArray

diff --git a/pySDC/implementations/problem_classes/RayleighBenard.py b/pySDC/implementations/problem_classes/RayleighBenard.py
@@ -134,7 +134,7 @@ def __init__(
             Nyquist_mode_index = self.axes[0].get_Nyquist_mode_index()
             for component in self.components:
                 self.add_BC(
-                    component=component, equation=component, axis=0, kind='Nyquist', line=Nyquist_mode_index, v=0
+                    component=component, equation=component, axis=0, kind='Nyquist', line=int(Nyquist_mode_index), v=0
                 )
         self.setup_BCs()
 
@@ -434,7 +434,7 @@ def compute_max_step_size(P, u):
 
         if hasattr(P, 'comm'):
             max_step_size = P.comm.allreduce(max_step_size, op=MPI.MIN)
-        return max_step_size
+        return float(max_step_size)
 
     def get_new_step_size(self, controller, step, **kwargs):
         if not CheckConvergence.check_convergence(step):

diff --git a/pySDC/implementations/problem_classes/generic_spectral.py b/pySDC/implementations/problem_classes/generic_spectral.py
@@ -18,8 +18,6 @@ def setup_GPU(cls):
         from pySDC.implementations.datatype_classes.cupy_mesh import cupy_mesh, imex_cupy_mesh
         from pySDC.implementations.datatype_classes.mesh import mesh, imex_mesh
 
-        cls.xp = cp
-
         cls.dtype_u = cupy_mesh
 
         GPU_versions = {
@@ -183,8 +181,6 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
 
         Note that in implicit Euler, the right hand side will be composed of the initial conditions. We don't want that in lines that don't depend on time. Therefore, we multiply the right hand side by the mass matrix. This means you can only do algebraic conditions that add up to zero. But you can easily overload this function with something more generic if needed.
         """
-        if dt == 0:
-            return rhs
 
         sp = self.spectral.sparse_lib
 
@@ -249,7 +245,6 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
         if self.spectral_space:
             return sol_hat
         else:
-
             sol = self.spectral.u_init
             sol[:] = self.spectral.itransform(sol_hat).real
 
@@ -313,3 +308,50 @@ def compute_residual_DAE(self, stage=''):
     L.status.updated = False
 
     return None
+
+
+def compute_residual_DAE_MPI(self, stage=None):
+    """
+    Computation of the residual using the collocation matrix Q
+
+    Args:
+        stage (str): The current stage of the step the level belongs to
+    """
+    from mpi4py import MPI
+
+    L = self.level
+
+    # Check if we want to skip the residual computation to gain performance
+    # Keep in mind that skipping any residual computation is likely to give incorrect outputs of the residual!
+    if stage in self.params.skip_residual_computation:
+        L.status.residual = 0.0 if L.status.residual is None else L.status.residual
+        return None
+
+    # compute the residual for each node
+
+    # build QF(u)
+    res = self.integrate(last_only=L.params.residual_type[:4] == 'last')
+    mask = L.prob.diff_mask
+    res[mask] += L.u[0][mask] - L.u[self.rank + 1][mask]
+    # add tau if associated
+    if L.tau[self.rank] is not None:
+        res += L.tau[self.rank]
+    # use abs function from data type here
+    res_norm = abs(res)
+
+    # find maximal residual over the nodes
+    if L.params.residual_type == 'full_abs':
+        L.status.residual = self.comm.allreduce(res_norm, op=MPI.MAX)
+    elif L.params.residual_type == 'last_abs':
+        L.status.residual = self.comm.bcast(res_norm, root=self.comm.size - 1)
+    elif L.params.residual_type == 'full_rel':
+        L.status.residual = self.comm.allreduce(res_norm / abs(L.u[0]), op=MPI.MAX)
+    elif L.params.residual_type == 'last_rel':
+        L.status.residual = self.comm.bcast(res_norm / abs(L.u[0]), root=self.comm.size - 1)
+    else:
+        raise NotImplementedError(f'residual type \"{L.params.residual_type}\" not implemented!')
+
+    # indicate that the residual has seen the new values
+    L.status.updated = False
+
+    return None