Skip to content

Commit

Permalink
docs: Use switchconfig and update FAQ
Browse files Browse the repository at this point in the history
  • Loading branch information
georgebisbas committed Apr 18, 2024
1 parent 5735e62 commit 8e6ab03
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 19 deletions.
2 changes: 1 addition & 1 deletion FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ By default, Devito compiles the generated code using flags that maximize the run

[top](#Frequently-Asked-Questions)

## Can I control the MPI domain decomposition
## Can I control the MPI domain decomposition?

Until Devito v3.5 included, domain decomposition occurs along the fastest axis. As of later versions, domain decomposition occurs along the slowest axis, for performance reasons. And yes, it is possible to control the domain decomposition in user code, but this is not neatly documented. Take a look at `class CustomTopology` in [distributed.py](https://github.com/devitocodes/devito/blob/master/devito/mpi/distributed.py) and `test_custom_topology` in [this file](https://github.com/devitocodes/devito/blob/master/tests/test_mpi.py). In essence, `Grid` accepts the optional argument `topology`, which allows the user to pass a custom topology as an n-tuple, where `n` is the number of distributed dimensions. For example, for a two-dimensional grid, the topology `(4, 1)` will decompose the slowest axis into four partitions, one partition per MPI rank, while the fastest axis will be replicated over all MPI ranks.

Expand Down
2 changes: 1 addition & 1 deletion devito/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def set_log_level(level, comm=None):
"""
from devito import configuration

if comm is not None:
if comm is not None and configuration['mpi']:
if comm.rank != 0:
logger.removeHandler(stream_handler)
logger.addHandler(logging.NullHandler())
Expand Down
24 changes: 8 additions & 16 deletions devito/operator/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from cached_property import cached_property
from sympy import sympify

from devito import switchconfig
from devito.arch import compiler_registry, platform_registry
from devito.data import default_allocator
from devito.exceptions import InvalidOperator
Expand Down Expand Up @@ -871,8 +872,10 @@ def apply(self, **kwargs):
# Post-process runtime arguments
self._postprocess_arguments(args, **kwargs)

# Output summary of performance achieved
return self._emit_apply_profiling(args)
# Output summary of performance achieved and
# temporarily drop MPI for printing arguments
with switchconfig(mpi=False):
return self._emit_apply_profiling(args)

# Performance profiling

Expand Down Expand Up @@ -910,10 +913,8 @@ def _emit_timings(timings, indent=''):
def _emit_apply_profiling(self, args):
"""Produce a performance summary of the profiled sections."""

# In case 'MPI0' is selected for logging, restrict result printing to one rank
temp = configuration['log-level']
if configuration['mpi']:
set_log_level(configuration['log-level'], comm=args.comm)
# In case MPI is used restrict result logging to one rank
set_log_level('DEBUG', comm=args.comm)

# Rounder to 2 decimal places
fround = lambda i: ceil(i * 100) / 100
Expand Down Expand Up @@ -1008,16 +1009,7 @@ def lower_perfentry(v):
if a in args:
perf_args[a] = args[a]
break

if configuration['mpi']:
perf("Performance[mode=%s, mpi=%s] arguments: %s, " %
(self._mode, configuration['mpi'], perf_args))
else:
perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args))

# Restore logging configuration to all ranks
if configuration['mpi']:
set_log_level(temp, comm=None)
perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args))

return summary

Expand Down
2 changes: 1 addition & 1 deletion devito/operator/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def add_glb_vanilla(self, key, time):
ops = sum(v.ops for v in self.input.values())
traffic = sum(v.traffic for v in self.input.values())

if np.isnan(traffic):
if np.isnan(traffic) or traffic == 0:
return

gflops = float(ops)/10**9
Expand Down

0 comments on commit 8e6ab03

Please sign in to comment.