Skip to content

Commit

Permalink
Updated some things for GPUs
Browse files Browse the repository at this point in the history
  • Loading branch information
brownbaerchen committed Sep 13, 2024
1 parent 016a784 commit f33a7e9
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 7 deletions.
6 changes: 6 additions & 0 deletions pySDC/helpers/spectral_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ def setup_GPU(cls):
import cupy as cp
import cupyx.scipy.sparse as sparse_lib
import cupyx.scipy.sparse.linalg as linalg
import cupyx.scipy.fft as fft_lib
from pySDC.implementations.datatype_classes.cupy_mesh import cupy_mesh

cls.xp = cp
cls.sparse_lib = sparse_lib
cls.linalg = linalg
cls.fft_lib = fft_lib

def get_Id(self):
return self.sparse_lib.eye(self.N)
Expand Down Expand Up @@ -1429,6 +1431,8 @@ def get_aligned(self, u, axis_in, axis_out, fft=None, forward=False, **kwargs):
"""
if self.comm is None or axis_in == axis_out:
return u.copy()
if self.comm.size == 1:
return u.copy()

fft = self.get_fft(**kwargs) if fft is None else fft

Expand All @@ -1450,6 +1454,7 @@ def get_aligned(self, u, axis_in, axis_out, fft=None, forward=False, **kwargs):

current_axis = transfer.axisB
u = arrayB

return u
elif axis_in in axisB and axis_out in axisA:
while current_axis != axis_out:
Expand All @@ -1463,6 +1468,7 @@ def get_aligned(self, u, axis_in, axis_out, fft=None, forward=False, **kwargs):

current_axis = transfer.axisA
u = arrayA

return u
else: # go the potentially slower route of not reusing transfer classes
from mpi4py_fft import newDistArray
Expand Down
4 changes: 2 additions & 2 deletions pySDC/implementations/problem_classes/RayleighBenard.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def __init__(
Nyquist_mode_index = self.axes[0].get_Nyquist_mode_index()
for component in self.components:
self.add_BC(
component=component, equation=component, axis=0, kind='Nyquist', line=Nyquist_mode_index, v=0
component=component, equation=component, axis=0, kind='Nyquist', line=int(Nyquist_mode_index), v=0
)
self.setup_BCs()

Expand Down Expand Up @@ -434,7 +434,7 @@ def compute_max_step_size(P, u):

if hasattr(P, 'comm'):
max_step_size = P.comm.allreduce(max_step_size, op=MPI.MIN)
return max_step_size
return float(max_step_size)

def get_new_step_size(self, controller, step, **kwargs):
if not CheckConvergence.check_convergence(step):
Expand Down
52 changes: 47 additions & 5 deletions pySDC/implementations/problem_classes/generic_spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ def setup_GPU(cls):
from pySDC.implementations.datatype_classes.cupy_mesh import cupy_mesh, imex_cupy_mesh
from pySDC.implementations.datatype_classes.mesh import mesh, imex_mesh

cls.xp = cp

cls.dtype_u = cupy_mesh

GPU_versions = {
Expand Down Expand Up @@ -183,8 +181,6 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
Note that in implicit Euler, the right hand side will be composed of the initial conditions. We don't want that in lines that don't depend on time. Therefore, we multiply the right hand side by the mass matrix. This means you can only do algebraic conditions that add up to zero. But you can easily overload this function with something more generic if needed.
"""
if dt == 0:
return rhs

sp = self.spectral.sparse_lib

Expand Down Expand Up @@ -249,7 +245,6 @@ def solve_system(self, rhs, dt, u0=None, *args, skip_itransform=False, **kwargs)
if self.spectral_space:
return sol_hat
else:

sol = self.spectral.u_init
sol[:] = self.spectral.itransform(sol_hat).real

Expand Down Expand Up @@ -313,3 +308,50 @@ def compute_residual_DAE(self, stage=''):
L.status.updated = False

return None


def compute_residual_DAE_MPI(self, stage=None):
"""
Computation of the residual using the collocation matrix Q
Args:
stage (str): The current stage of the step the level belongs to
"""
from mpi4py import MPI

L = self.level

# Check if we want to skip the residual computation to gain performance
# Keep in mind that skipping any residual computation is likely to give incorrect outputs of the residual!
if stage in self.params.skip_residual_computation:
L.status.residual = 0.0 if L.status.residual is None else L.status.residual
return None

# compute the residual for each node

# build QF(u)
res = self.integrate(last_only=L.params.residual_type[:4] == 'last')
mask = L.prob.diff_mask
res[mask] += L.u[0][mask] - L.u[self.rank + 1][mask]
# add tau if associated
if L.tau[self.rank] is not None:
res += L.tau[self.rank]
# use abs function from data type here
res_norm = abs(res)

# find maximal residual over the nodes
if L.params.residual_type == 'full_abs':
L.status.residual = self.comm.allreduce(res_norm, op=MPI.MAX)
elif L.params.residual_type == 'last_abs':
L.status.residual = self.comm.bcast(res_norm, root=self.comm.size - 1)
elif L.params.residual_type == 'full_rel':
L.status.residual = self.comm.allreduce(res_norm / abs(L.u[0]), op=MPI.MAX)
elif L.params.residual_type == 'last_rel':
L.status.residual = self.comm.bcast(res_norm / abs(L.u[0]), root=self.comm.size - 1)
else:
raise NotImplementedError(f'residual type \"{L.params.residual_type}\" not implemented!')

# indicate that the residual has seen the new values
L.status.updated = False

return None

0 comments on commit f33a7e9

Please sign in to comment.